Commit 9b337b7d authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher
Browse files

drm/amdgpu: Adjust the sequence to query ras error info



It turns out STATUS_VALID_FLAG needs to be checked
ahead of any other fields. ADDRESS_VALID_FLAG and
ERR_INFO_VALID_FLAG only manages ADDRESS and ERR_INFO
field respectively. driver should continue poll
ERR CNT field even ERR_INFO_VALD_FLAG is not set.

Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 35d54e21
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -3164,7 +3164,8 @@ bool amdgpu_ras_inst_get_err_cnt_field(struct amdgpu_device *adev,

	if ((reg_entry->flags & AMDGPU_RAS_ERR_INFO_VALID) &&
	    !REG_GET_FIELD(err_status_hi_data, ERR_STATUS_HI, ERR_INFO_VALID_FLAG))
		return false;
		/* keep the check here in case we need to refer to the result later */
		dev_dbg(adev->dev, "Invalid err_info field\n");

	/* read err count */
	*err_cnt = REG_GET_FIELD(err_status_hi_data, ERR_STATUS, ERR_CNT);
@@ -3187,17 +3188,17 @@ void amdgpu_ras_inst_query_ras_error_count(struct amdgpu_device *adev,
	uint32_t i, j;

	for (i = 0; i < reg_list_size; i++) {
		/* query memory_id from err_status_lo */
		if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
							 instance, &memory_id))
			continue;

		/* query err_cnt from err_status_hi */
		if (!amdgpu_ras_inst_get_err_cnt_field(adev, &reg_list[i],
						       instance, &err_cnt) ||
		    !err_cnt)
			continue;

		/* query memory_id from err_status_lo */
		if (!amdgpu_ras_inst_get_memory_id_field(adev, &reg_list[i],
							 instance, &memory_id))
			continue;

		*err_count += err_cnt;

		/* log the errors */