Commit 8f8c80f4 authored by Jingwen Chen's avatar Jingwen Chen Committed by Alex Deucher
Browse files

drm/amd/amdgpu: move inc gpu_reset_counter after drm_sched_stop



Move gpu_reset_counter after drm_sched_stop to avoid race
condition caused by job submitted between reset_count +1 and
drm_sched_stop.

Signed-off-by: default avatarJingwen Chen <Jingwen.Chen2@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8f4828d0
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -4449,7 +4449,6 @@ static bool amdgpu_device_lock_adev(struct amdgpu_device *adev,
		down_write(&adev->reset_sem);
	}

	atomic_inc(&adev->gpu_reset_counter);
	switch (amdgpu_asic_reset_method(adev)) {
	case AMD_RESET_METHOD_MODE1:
		adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
@@ -4710,6 +4709,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
			if (need_emergency_restart)
				amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
		}
		atomic_inc(&tmp_adev->gpu_reset_counter);
	}

	if (need_emergency_restart)
@@ -5052,6 +5052,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta

			drm_sched_stop(&ring->sched, NULL);
		}
		atomic_inc(&adev->gpu_reset_counter);
		return PCI_ERS_RESULT_NEED_RESET;
	case pci_channel_io_perm_failure:
		/* Permanent error, prepare for device removal */