Commit 5fb34bd9 authored by Alex Sierra's avatar Alex Sierra Committed by Alex Deucher
Browse files

drm/amdkfd: pass kfd_node ref to svm migration api



This work is required for GC 9.4.3, previous to support memory
partitions per node at SVM. When multiple partition is configured,
every BO should be allocated inside one specific partition which
corresponds to the current amdgpu_device and kfd_node.

v2: squash in compilation fix (Alex)
v3: squash in fix for pre-gfx 9.4.3 (Alex)
v4: squash in best_loc fix (Alex)

Signed-off-by: default avatarAlex Sierra <alex.sierra@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0b02364e
Loading
Loading
Loading
Loading
+4 −3
Original line number Diff line number Diff line
@@ -2441,7 +2441,8 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
 * shouldn't be reported any more.
 */
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
			    uint64_t addr, bool write_fault)
			    u32 client_id, u32 node_id, uint64_t addr,
			    bool write_fault)
{
	bool is_compute_context = false;
	struct amdgpu_bo *root;
@@ -2465,8 +2466,8 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,

	addr /= AMDGPU_GPU_PAGE_SIZE;

	if (is_compute_context &&
	    !svm_range_restore_pages(adev, pasid, addr, write_fault)) {
	if (is_compute_context && !svm_range_restore_pages(adev, pasid, client_id,
	    node_id, addr, write_fault)) {
		amdgpu_bo_unref(&root);
		return true;
	}
+2 −1
Original line number Diff line number Diff line
@@ -455,7 +455,8 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);
void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid,
			     struct amdgpu_task_info *task_info);
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
			    uint64_t addr, bool write_fault);
			    u32 client_id, u32 node_id, uint64_t addr,
			    bool write_fault);

void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);

+1 −1
Original line number Diff line number Diff line
@@ -139,7 +139,7 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev,
		/* Try to handle the recoverable page faults by filling page
		 * tables
		 */
		if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
		if (amdgpu_vm_handle_fault(adev, entry->pasid, 0, 0, addr, write_fault))
			return 1;
	}

+18 −15
Original line number Diff line number Diff line
@@ -557,11 +557,24 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
	u64 addr;
	uint32_t cam_index = 0;
	int ret;
	uint32_t node_id;
	uint32_t node_id = 0;

	addr = (u64)entry->src_data[0] << 12;
	addr |= ((u64)entry->src_data[1] & 0xf) << 44;

	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
		hub_name = "mmhub0";
		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
		hub_name = "mmhub1";
		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
	} else {
		hub_name = "gfxhub0";
		node_id = (adev->ip_versions[GC_HWIP][0] ==
			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
		hub = &adev->vmhub[node_id/2];
	}

	if (retry_fault) {
		if (adev->irq.retry_cam_enabled) {
			/* Delegate it to a different ring if the hardware hasn't
@@ -574,7 +587,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,

			cam_index = entry->src_data[2] & 0x3ff;

			ret = amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault);
			ret = amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
						     addr, write_fault);
			WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index);
			if (ret)
				return 1;
@@ -596,7 +610,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
			/* Try to handle the recoverable page faults by filling page
			 * tables
			 */
			if (amdgpu_vm_handle_fault(adev, entry->pasid, addr, write_fault))
			if (amdgpu_vm_handle_fault(adev, entry->pasid, entry->client_id, node_id,
						   addr, write_fault))
				return 1;
		}
	}
@@ -604,18 +619,6 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
	if (!printk_ratelimit())
		return 0;

	if (entry->client_id == SOC15_IH_CLIENTID_VMC) {
		hub_name = "mmhub0";
		hub = &adev->vmhub[AMDGPU_MMHUB0(0)];
	} else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) {
		hub_name = "mmhub1";
		hub = &adev->vmhub[AMDGPU_MMHUB1(0)];
	} else {
		hub_name = "gfxhub0";
		node_id = (adev->ip_versions[GC_HWIP][0] ==
			   IP_VERSION(9, 4, 3)) ? entry->node_id : 0;
		hub = &adev->vmhub[node_id/2];
	}

	memset(&task_info, 0, sizeof(struct amdgpu_task_info));
	amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
+23 −22
Original line number Diff line number Diff line
@@ -287,11 +287,12 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
}

static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange,
			 struct migrate_vma *migrate, struct dma_fence **mfence,
			 dma_addr_t *scratch, uint64_t ttm_res_offset)
{
	uint64_t npages = migrate->npages;
	uint64_t npages = migrate->cpages;
	struct amdgpu_device *adev = node->adev;
	struct device *dev = adev->dev;
	struct amdgpu_res_cursor cursor;
	dma_addr_t *src;
@@ -321,7 +322,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
					      DMA_TO_DEVICE);
			r = dma_mapping_error(dev, src[i]);
			if (r) {
				dev_err(adev->dev, "%s: fail %d dma_map_page\n",
				dev_err(dev, "%s: fail %d dma_map_page\n",
					__func__, r);
				goto out_free_vram_pages;
			}
@@ -390,12 +391,13 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
}

static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_vma_to_vram(struct kfd_node *node, struct svm_range *prange,
			struct vm_area_struct *vma, uint64_t start,
			uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
{
	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
	uint64_t npages = (end - start) >> PAGE_SHIFT;
	struct amdgpu_device *adev = node->adev;
	struct kfd_process_device *pdd;
	struct dma_fence *mfence = NULL;
	struct migrate_vma migrate = { 0 };
@@ -445,7 +447,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
	else
		pr_debug("0x%lx pages migrated\n", cpages);

	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset);
	r = svm_migrate_copy_to_vram(node, prange, &migrate, &mfence, scratch, ttm_res_offset);
	migrate_vma_pages(&migrate);

	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -465,7 +467,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
	kvfree(buf);
out:
	if (!r && cpages) {
		pdd = svm_range_get_pdd_by_adev(prange, adev);
		pdd = svm_range_get_pdd_by_node(prange, node);
		if (pdd)
			WRITE_ONCE(pdd->page_in, pdd->page_in + cpages);

@@ -492,8 +494,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
{
	unsigned long addr, start, end;
	struct vm_area_struct *vma;
	struct amdgpu_device *adev;
	uint64_t ttm_res_offset;
	struct kfd_node *node;
	unsigned long cpages = 0;
	long r = 0;

@@ -503,9 +505,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
		return 0;
	}

	adev = svm_range_get_adev_by_id(prange, best_loc);
	if (!adev) {
		pr_debug("failed to get device by id 0x%x\n", best_loc);
	node = svm_range_get_node_by_id(prange, best_loc);
	if (!node) {
		pr_debug("failed to get kfd node by id 0x%x\n", best_loc);
		return -ENODEV;
	}

@@ -515,9 +517,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
	start = prange->start << PAGE_SHIFT;
	end = (prange->last + 1) << PAGE_SHIFT;

	r = svm_range_vram_node_new(adev, prange, true);
	r = svm_range_vram_node_new(node, prange, true);
	if (r) {
		dev_dbg(adev->dev, "fail %ld to alloc vram\n", r);
		dev_dbg(node->adev->dev, "fail %ld to alloc vram\n", r);
		return r;
	}
	ttm_res_offset = prange->offset << PAGE_SHIFT;
@@ -530,7 +532,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
			break;

		next = min(vma->vm_end, end);
		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset);
		r = svm_migrate_vma_to_vram(node, prange, vma, addr, next, trigger, ttm_res_offset);
		if (r < 0) {
			pr_debug("failed %ld to migrate\n", r);
			break;
@@ -663,7 +665,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
 *   positive values - partial migration, number of pages not migrated
 */
static long
svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
svm_migrate_vma_to_ram(struct kfd_node *node, struct svm_range *prange,
		       struct vm_area_struct *vma, uint64_t start, uint64_t end,
		       uint32_t trigger, struct page *fault_page)
{
@@ -671,6 +673,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
	uint64_t npages = (end - start) >> PAGE_SHIFT;
	unsigned long upages = npages;
	unsigned long cpages = 0;
	struct amdgpu_device *adev = node->adev;
	struct kfd_process_device *pdd;
	struct dma_fence *mfence = NULL;
	struct migrate_vma migrate = { 0 };
@@ -745,7 +748,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
	kvfree(buf);
out:
	if (!r && cpages) {
		pdd = svm_range_get_pdd_by_adev(prange, adev);
		pdd = svm_range_get_pdd_by_node(prange, node);
		if (pdd)
			WRITE_ONCE(pdd->page_out, pdd->page_out + cpages);
	}
@@ -766,7 +769,7 @@ svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
			    uint32_t trigger, struct page *fault_page)
{
	struct amdgpu_device *adev;
	struct kfd_node *node;
	struct vm_area_struct *vma;
	unsigned long addr;
	unsigned long start;
@@ -780,13 +783,11 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
		return 0;
	}

	adev = svm_range_get_adev_by_id(prange, prange->actual_loc);
	if (!adev) {
		pr_debug("failed to get device by id 0x%x\n",
			 prange->actual_loc);
	node = svm_range_get_node_by_id(prange, prange->actual_loc);
	if (!node) {
		pr_debug("failed to get kfd node by id 0x%x\n", prange->actual_loc);
		return -ENODEV;
	}

	pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n",
		 prange->svms, prange, prange->start, prange->last,
		 prange->actual_loc);
@@ -805,7 +806,7 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
		}

		next = min(vma->vm_end, end);
		r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next, trigger,
		r = svm_migrate_vma_to_ram(node, prange, vma, addr, next, trigger,
			fault_page);
		if (r < 0) {
			pr_debug("failed %ld to migrate prange %p\n", r, prange);
Loading