Commit 1e4a0033 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher
Browse files

drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3



Treat system memory on NUMA systems as remote by default. Overriding with
a more efficient MTYPE per page will be implemented in the next patch.

No need for a special case for APP APUs. System memory is handled the same
for carve-out and native mode. And VRAM doesn't exist in native mode.

Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Reviewed-and-tested-by: default avatarRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 895797d9
Loading
Loading
Loading
Loading
+16 −24
Original line number Diff line number Diff line
@@ -1186,9 +1186,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
	unsigned int mtype;
	unsigned int mtype_default;
	/* TODO: memory partitions struct amdgpu_vm *vm = mapping->bo_va->base.vm;*/
	unsigned int mtype_local, mtype;
	bool snoop = false;
	bool is_local;

	switch (adev->ip_versions[GC_HWIP][0]) {
	case IP_VERSION(9, 4, 1):
@@ -1228,35 +1229,26 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
		}
		break;
	case IP_VERSION(9, 4, 3):
		/* FIXME: Needs more work for handling multiple memory
		 * partitions (> NPS1 mode) e.g. NPS4 for both APU and dGPU
		 * modes.
		 * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
		 * To force use of MTYPE_RW, set use_mtype_cc_wa=0
		/* Only local VRAM BOs or system memory on non-NUMA APUs
		 * can be assumed to be local in their entirety. Choose
		 * MTYPE_NC as safe fallback for all system memory BOs on
		 * NUMA systems. Their MTYPE can be overridden per-page in
		 * gmc_v9_0_override_vm_pte_flags.
		 */
		mtype_default = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
		mtype_local = amdgpu_use_mtype_cc_wa ? MTYPE_CC : MTYPE_RW;
		is_local = (!is_vram && (adev->flags & AMD_IS_APU) &&
			    num_possible_nodes() <= 1) ||
			   (is_vram && adev == bo_adev /* TODO: memory partitions &&
			    bo->mem_id == vm->mem_id*/);
		snoop = true;
		if (uncached) {
			mtype = MTYPE_UC;
		} else if (adev->gmc.is_app_apu) {
			/* FIXME: APU in native mode, NPS1 single socket only
			 *
			 * For suporting NUMA partitioned APU e.g. in NPS4 mode,
			 * this need to look at the NUMA node on which the
			 * system memory allocation was done.
			 *
			 * Memory access by a different partition within same
			 * socket should be treated as remote access so MTYPE_RW
			 * cannot be used always.
			 */
			mtype = mtype_default;
		} else if (adev->flags & AMD_IS_APU) {
			/* APU on carve out mode */
			mtype = mtype_default;
			mtype = is_local ? mtype_local : MTYPE_NC;
		} else {
			/* dGPU */
			if (is_vram && bo_adev == adev)
				mtype = mtype_default;
			if (is_local)
				mtype = mtype_local;
			else if (is_vram)
				mtype = MTYPE_NC;
			else
+14 −10
Original line number Diff line number Diff line
@@ -1151,6 +1151,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
	bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
	bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
	bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED;
	unsigned int mtype_local;

	if (domain == SVM_RANGE_VRAM_DOMAIN)
		bo_node = prange->svm_bo->node;
@@ -1191,19 +1192,16 @@ svm_range_get_pte_flags(struct kfd_node *node,
		}
		break;
	case IP_VERSION(9, 4, 3):
		//TODO: Need more work for handling multiple memory partitions
		//e.g. NPS4. Current approch is only applicable without memory
		//partitions.
		mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC :
						       AMDGPU_VM_MTYPE_RW;
		snoop = true;
		if (uncached) {
			mapping_flags |= AMDGPU_VM_MTYPE_UC;
		} else if (domain == SVM_RANGE_VRAM_DOMAIN) {
			/* local HBM region close to partition
			 * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable.
			 * To force use of MTYPE_RW, set use_mtype_cc_wa=0
			 */
			if (bo_node == node)
				mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
			/* local HBM region close to partition */
			if (bo_node->adev == node->adev /* TODO: memory partitions &&
			    bo_node->mem_id == node->mem_id*/)
				mapping_flags |= mtype_local;
			/* local HBM region far from partition or remote XGMI GPU */
			else if (svm_nodes_in_same_hive(bo_node, node))
				mapping_flags |= AMDGPU_VM_MTYPE_NC;
@@ -1212,6 +1210,12 @@ svm_range_get_pte_flags(struct kfd_node *node,
				mapping_flags |= AMDGPU_VM_MTYPE_UC;
		/* system memory accessed by the APU */
		} else if (node->adev->flags & AMD_IS_APU) {
			/* On NUMA systems, locality is determined per-page
			 * in amdgpu_gmc_override_vm_pte_flags
			 */
			if (num_possible_nodes() <= 1)
				mapping_flags |= mtype_local;
			else
				mapping_flags |= AMDGPU_VM_MTYPE_NC;
		/* system memory accessed by the dGPU */
		} else {