Merge tag 'drm-next-2022-06-03-1' of git://anongit.freedesktop.org/drm/drm (ab18b7b3) · Commits · jan.koester / Linux

drivers/gpu/Makefile

+1 −2

Original line number	Diff line number	Diff line
		@@ -2,7 +2,6 @@
		# drm/tegra depends on host1x, so if both drivers are built-in care must be
		# taken to initialize them in the correct order. Link order is the only way
		# to ensure this currently.
		obj-$(CONFIG_TEGRA_HOST1X) += host1x/
		obj-y += drm/ vga/
		obj-y += host1x/ drm/ vga/
		obj-$(CONFIG_IMX_IPUV3_CORE) += ipu-v3/
		obj-$(CONFIG_TRACE_GPU_MEM) += trace/

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -1621,7 +1621,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(

		mutex_lock(&mem->lock);

		/* Unpin MMIO/DOORBELL BO's that were pinnned during allocation */
		/* Unpin MMIO/DOORBELL BO's that were pinned during allocation */
		if (mem->alloc_flags &
		(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL \|
		KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {

drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c

+6 −2

Original line number	Diff line number	Diff line
		@@ -188,13 +188,17 @@ static int convert_atom_mem_type_to_vram_type(struct amdgpu_device *adev,
		vram_type = AMDGPU_VRAM_TYPE_DDR3;
		break;
		case Ddr4MemType:
		case LpDdr4MemType:
		vram_type = AMDGPU_VRAM_TYPE_DDR4;
		break;
		case LpDdr4MemType:
		vram_type = AMDGPU_VRAM_TYPE_LPDDR4;
		break;
		case Ddr5MemType:
		case LpDdr5MemType:
		vram_type = AMDGPU_VRAM_TYPE_DDR5;
		break;
		case LpDdr5MemType:
		vram_type = AMDGPU_VRAM_TYPE_LPDDR5;
		break;
		default:
		vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
		break;

drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -116,7 +116,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
		int ret;

		if (cs->in.num_chunks == 0)
		return 0;
		return -EINVAL;

		chunk_array = kvmalloc_array(cs->in.num_chunks, sizeof(uint64_t), GFP_KERNEL);
		if (!chunk_array)
		@@ -1252,7 +1252,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,

		p->fence = dma_fence_get(&job->base.s_fence->finished);

		amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
		seq = amdgpu_ctx_add_fence(p->ctx, entity, p->fence);
		amdgpu_cs_post_dependencies(p);

		if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&

drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c

+113 −110

Original line number	Diff line number	Diff line
		@@ -135,9 +135,9 @@ static enum amdgpu_ring_priority_level amdgpu_ctx_sched_prio_to_ring_prio(int32_

		static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
		{
		struct amdgpu_device *adev = ctx->adev;
		int32_t ctx_prio;
		struct amdgpu_device *adev = ctx->mgr->adev;
		unsigned int hw_prio;
		int32_t ctx_prio;

		ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
		ctx->init_priority : ctx->override_priority;
		@@ -162,17 +162,50 @@ static unsigned int amdgpu_ctx_get_hw_prio(struct amdgpu_ctx *ctx, u32 hw_ip)
		return hw_prio;
		}

		/* Calculate the time spend on the hw */
		static ktime_t amdgpu_ctx_fence_time(struct dma_fence *fence)
		{
		struct drm_sched_fence *s_fence;

		if (!fence)
		return ns_to_ktime(0);

		/* When the fence is not even scheduled it can't have spend time */
		s_fence = to_drm_sched_fence(fence);
		if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->scheduled.flags))
		return ns_to_ktime(0);

		/* When it is still running account how much already spend */
		if (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &s_fence->finished.flags))
		return ktime_sub(ktime_get(), s_fence->scheduled.timestamp);

		return ktime_sub(s_fence->finished.timestamp,
		s_fence->scheduled.timestamp);
		}

		static ktime_t amdgpu_ctx_entity_time(struct amdgpu_ctx *ctx,
		struct amdgpu_ctx_entity *centity)
		{
		ktime_t res = ns_to_ktime(0);
		uint32_t i;

		spin_lock(&ctx->ring_lock);
		for (i = 0; i < amdgpu_sched_jobs; i++) {
		res = ktime_add(res, amdgpu_ctx_fence_time(centity->fences[i]));
		}
		spin_unlock(&ctx->ring_lock);
		return res;
		}

		static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
		const u32 ring)
		{
		struct amdgpu_device *adev = ctx->adev;
		struct amdgpu_ctx_entity *entity;
		struct drm_gpu_scheduler *scheds = NULL, sched = NULL;
		unsigned num_scheds = 0;
		int32_t ctx_prio;
		unsigned int hw_prio;
		struct amdgpu_device *adev = ctx->mgr->adev;
		struct amdgpu_ctx_entity *entity;
		enum drm_sched_priority drm_prio;
		unsigned int hw_prio, num_scheds;
		int32_t ctx_prio;
		int r;

		entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
		@@ -182,6 +215,7 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,

		ctx_prio = (ctx->override_priority == AMDGPU_CTX_PRIORITY_UNSET) ?
		ctx->init_priority : ctx->override_priority;
		entity->hw_ip = hw_ip;
		entity->sequence = 1;
		hw_prio = amdgpu_ctx_get_hw_prio(ctx, hw_ip);
		drm_prio = amdgpu_ctx_to_drm_sched_prio(ctx_prio);
		@@ -220,10 +254,25 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
		return r;
		}

		static int amdgpu_ctx_init(struct amdgpu_device *adev,
		int32_t priority,
		struct drm_file *filp,
		struct amdgpu_ctx *ctx)
		static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
		{
		ktime_t res = ns_to_ktime(0);
		int i;

		if (!entity)
		return res;

		for (i = 0; i < amdgpu_sched_jobs; ++i) {
		res = ktime_add(res, amdgpu_ctx_fence_time(entity->fences[i]));
		dma_fence_put(entity->fences[i]);
		}

		kfree(entity);
		return res;
		}

		static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
		struct drm_file filp, struct amdgpu_ctx ctx)
		{
		int r;

		@@ -233,15 +282,14 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,

		memset(ctx, 0, sizeof(*ctx));

		ctx->adev = adev;

		kref_init(&ctx->refcount);
		ctx->mgr = mgr;
		spin_lock_init(&ctx->ring_lock);
		mutex_init(&ctx->lock);

		ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
		ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
		ctx->reset_counter_query = ctx->reset_counter;
		ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
		ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
		ctx->init_priority = priority;
		ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
		ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
		@@ -249,24 +297,10 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
		return 0;
		}

		static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
		{

		int i;

		if (!entity)
		return;

		for (i = 0; i < amdgpu_sched_jobs; ++i)
		dma_fence_put(entity->fences[i]);

		kfree(entity);
		}

		static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
		u32 *stable_pstate)
		{
		struct amdgpu_device *adev = ctx->adev;
		struct amdgpu_device *adev = ctx->mgr->adev;
		enum amd_dpm_forced_level current_level;

		current_level = amdgpu_dpm_get_performance_level(adev);
		@@ -294,7 +328,7 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
		static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
		u32 stable_pstate)
		{
		struct amdgpu_device *adev = ctx->adev;
		struct amdgpu_device *adev = ctx->mgr->adev;
		enum amd_dpm_forced_level level;
		u32 current_stable_pstate;
		int r;
		@@ -345,7 +379,8 @@ static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
		static void amdgpu_ctx_fini(struct kref *ref)
		{
		struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
		struct amdgpu_device *adev = ctx->adev;
		struct amdgpu_ctx_mgr *mgr = ctx->mgr;
		struct amdgpu_device *adev = mgr->adev;
		unsigned i, j, idx;

		if (!adev)
		@@ -353,8 +388,10 @@ static void amdgpu_ctx_fini(struct kref *ref)

		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
		amdgpu_ctx_fini_entity(ctx->entities[i][j]);
		ctx->entities[i][j] = NULL;
		ktime_t spend;

		spend = amdgpu_ctx_fini_entity(ctx->entities[i][j]);
		atomic64_add(ktime_to_ns(spend), &mgr->time_spend[i]);
		}
		}

		@@ -421,7 +458,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
		}

		*id = (uint32_t)r;
		r = amdgpu_ctx_init(adev, priority, filp, ctx);
		r = amdgpu_ctx_init(mgr, priority, filp, ctx);
		if (r) {
		idr_remove(&mgr->ctx_handles, *id);
		*id = 0;
		@@ -671,9 +708,9 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
		return 0;
		}

		void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
		uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
		struct drm_sched_entity *entity,
		struct dma_fence fence, uint64_t handle)
		struct dma_fence *fence)
		{
		struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
		uint64_t seq = centity->sequence;
		@@ -682,8 +719,7 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,

		idx = seq & (amdgpu_sched_jobs - 1);
		other = centity->fences[idx];
		if (other)
		BUG_ON(!dma_fence_is_signaled(other));
		WARN_ON(other && !dma_fence_is_signaled(other));

		dma_fence_get(fence);

		@@ -692,9 +728,11 @@ void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
		centity->sequence++;
		spin_unlock(&ctx->ring_lock);

		atomic64_add(ktime_to_ns(amdgpu_ctx_fence_time(other)),
		&ctx->mgr->time_spend[centity->hw_ip]);

		dma_fence_put(other);
		if (handle)
		*handle = seq;
		return seq;
		}

		struct dma_fence amdgpu_ctx_get_fence(struct amdgpu_ctx ctx,
		@@ -731,7 +769,7 @@ static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
		int hw_ip,
		int32_t priority)
		{
		struct amdgpu_device *adev = ctx->adev;
		struct amdgpu_device *adev = ctx->mgr->adev;
		unsigned int hw_prio;
		struct drm_gpu_scheduler **scheds = NULL;
		unsigned num_scheds;
		@@ -796,10 +834,17 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
		return r;
		}

		void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
		void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr,
		struct amdgpu_device *adev)
		{
		unsigned int i;

		mgr->adev = adev;
		mutex_init(&mgr->lock);
		idr_init(&mgr->ctx_handles);

		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i)
		atomic64_set(&mgr->time_spend[i], 0);
		}

		long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
		@@ -875,80 +920,38 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
		mutex_destroy(&mgr->lock);
		}

		static void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx,
		struct amdgpu_ctx_entity centity, ktime_t total, ktime_t *max)
		{
		ktime_t now, t1;
		uint32_t i;

		total = max = 0;

		now = ktime_get();
		for (i = 0; i < amdgpu_sched_jobs; i++) {
		struct dma_fence *fence;
		struct drm_sched_fence *s_fence;

		spin_lock(&ctx->ring_lock);
		fence = dma_fence_get(centity->fences[i]);
		spin_unlock(&ctx->ring_lock);
		if (!fence)
		continue;
		s_fence = to_drm_sched_fence(fence);
		if (!dma_fence_is_signaled(&s_fence->scheduled)) {
		dma_fence_put(fence);
		continue;
		}
		t1 = s_fence->scheduled.timestamp;
		if (!ktime_before(t1, now)) {
		dma_fence_put(fence);
		continue;
		}
		if (dma_fence_is_signaled(&s_fence->finished) &&
		s_fence->finished.timestamp < now)
		*total += ktime_sub(s_fence->finished.timestamp, t1);
		else
		*total += ktime_sub(now, t1);
		t1 = ktime_sub(now, t1);
		dma_fence_put(fence);
		max = max(t1, max);
		}
		}

		ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed)
		void amdgpu_ctx_mgr_usage(struct amdgpu_ctx_mgr *mgr,
		ktime_t usage[AMDGPU_HW_IP_NUM])
		{
		struct idr *idp;
		struct amdgpu_ctx *ctx;
		unsigned int hw_ip, i;
		uint32_t id;
		struct amdgpu_ctx_entity *centity;
		ktime_t total = 0, max = 0;

		if (idx >= AMDGPU_MAX_ENTITY_NUM)
		return 0;
		idp = &mgr->ctx_handles;
		/*
		* This is a little bit racy because it can be that a ctx or a fence are
		* destroyed just in the moment we try to account them. But that is ok
		* since exactly that case is explicitely allowed by the interface.
		*/
		mutex_lock(&mgr->lock);
		idr_for_each_entry(idp, ctx, id) {
		ktime_t ttotal, tmax;
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		uint64_t ns = atomic64_read(&mgr->time_spend[hw_ip]);

		if (!ctx->entities[hwip][idx])
		continue;
		usage[hw_ip] = ns_to_ktime(ns);
		}

		centity = ctx->entities[hwip][idx];
		amdgpu_ctx_fence_time(ctx, centity, &ttotal, &tmax);
		idr_for_each_entry(&mgr->ctx_handles, ctx, id) {
		for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
		for (i = 0; i < amdgpu_ctx_num_entities[hw_ip]; ++i) {
		struct amdgpu_ctx_entity *centity;
		ktime_t spend;

		/* Harmonic mean approximation diverges for very small
		* values. If ratio < 0.01% ignore
		*/
		if (AMDGPU_CTX_FENCE_USAGE_MIN_RATIO(tmax, ttotal))
		centity = ctx->entities[hw_ip][i];
		if (!centity)
		continue;

		total = ktime_add(total, ttotal);
		max = ktime_after(tmax, max) ? tmax : max;
		spend = amdgpu_ctx_entity_time(ctx, centity);
		usage[hw_ip] = ktime_add(usage[hw_ip], spend);
		}
		}
		}

		mutex_unlock(&mgr->lock);
		if (elapsed)
		*elapsed = max;

		return total;
		}