Commit efe17d5a authored by yipechai's avatar yipechai Committed by Alex Deucher
Browse files

drm/amdgpu: Modify umc block to fit for the unified ras block data and ops



1.Modify umc block to fit for the unified ras block data and ops.
2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of umc ras variable so that umc ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into amdgpu device ras block link list.
5.Remove the redundant code about umc in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of umc versions. If .ras_late_init and .ras_fini had been defined by the selected umc version, the defined functions will take effect; if not defined, default fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini.

Signed-off-by: default avataryipechai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarJohn Clements <john.clements@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2e54fe5d
Loading
Loading
Loading
Loading
+4 −6
Original line number Diff line number Diff line
@@ -440,9 +440,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
	int r;

	if (adev->umc.ras_funcs &&
	    adev->umc.ras_funcs->ras_late_init) {
		r = adev->umc.ras_funcs->ras_late_init(adev);
	if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) {
		r = adev->umc.ras->ras_block.ras_late_init(adev, NULL);
		if (r)
			return r;
	}
@@ -496,9 +495,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)

void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{
	if (adev->umc.ras_funcs &&
	    adev->umc.ras_funcs->ras_fini)
		adev->umc.ras_funcs->ras_fini(adev);
	if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini)
		adev->umc.ras->ras_block.ras_fini(adev);

	if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini)
		adev->mmhub.ras->ras_block.ras_fini(adev);
+15 −15
Original line number Diff line number Diff line
@@ -939,24 +939,24 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d
	 */
	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc));
	if (ret == -EOPNOTSUPP) {
		if (adev->umc.ras_funcs &&
			adev->umc.ras_funcs->query_ras_error_count)
			adev->umc.ras_funcs->query_ras_error_count(adev, err_data);
		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
			adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
			adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data);

		/* umc query_ras_error_address is also responsible for clearing
		 * error status
		 */
		if (adev->umc.ras_funcs &&
		    adev->umc.ras_funcs->query_ras_error_address)
			adev->umc.ras_funcs->query_ras_error_address(adev, err_data);
		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
		    adev->umc.ras->ras_block.hw_ops->query_ras_error_address)
			adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data);
	} else if (!ret) {
		if (adev->umc.ras_funcs &&
			adev->umc.ras_funcs->ecc_info_query_ras_error_count)
			adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data);
		if (adev->umc.ras &&
			adev->umc.ras->ecc_info_query_ras_error_count)
			adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data);

		if (adev->umc.ras_funcs &&
			adev->umc.ras_funcs->ecc_info_query_ras_error_address)
			adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data);
		if (adev->umc.ras &&
			adev->umc.ras->ecc_info_query_ras_error_address)
			adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data);
	}
}

@@ -2412,12 +2412,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
	}
	else if (adev->df.funcs &&
	    adev->df.funcs->query_ras_poison_mode &&
	    adev->umc.ras_funcs &&
	    adev->umc.ras_funcs->query_ras_poison_mode) {
	    adev->umc.ras &&
	    adev->umc.ras->query_ras_poison_mode) {
		df_poison =
			adev->df.funcs->query_ras_poison_mode(adev);
		umc_poison =
			adev->umc.ras_funcs->query_ras_poison_mode(adev);
			adev->umc.ras->query_ras_poison_mode(adev);
		/* Only poison is set in both DF and UMC, we can support it */
		if (df_poison && umc_poison)
			con->poison_supported = true;
+16 −16
Original line number Diff line number Diff line
@@ -35,12 +35,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
	ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
	if (ret == -EOPNOTSUPP) {
		if (adev->umc.ras_funcs &&
		    adev->umc.ras_funcs->query_ras_error_count)
		    adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
		    adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
		    adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);

		if (adev->umc.ras_funcs &&
		    adev->umc.ras_funcs->query_ras_error_address &&
		if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
		    adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
		    adev->umc.max_ras_err_cnt_per_query) {
			err_data->err_addr =
				kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -56,15 +56,15 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
			/* umc query_ras_error_address is also responsible for clearing
			 * error status
			 */
			adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
			adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
		}
	} else if (!ret) {
		if (adev->umc.ras_funcs &&
		    adev->umc.ras_funcs->ecc_info_query_ras_error_count)
		    adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status);
		if (adev->umc.ras &&
		    adev->umc.ras->ecc_info_query_ras_error_count)
		    adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);

		if (adev->umc.ras_funcs &&
		    adev->umc.ras_funcs->ecc_info_query_ras_error_address &&
		if (adev->umc.ras &&
		    adev->umc.ras->ecc_info_query_ras_error_address &&
		    adev->umc.max_ras_err_cnt_per_query) {
			err_data->err_addr =
				kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -80,7 +80,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
			/* umc query_ras_error_address is also responsible for clearing
			 * error status
			 */
			adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status);
			adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
		}
	}

@@ -136,7 +136,7 @@ static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
	return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
}

int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info)
{
	int r;
	struct ras_fs_if fs_info = {
@@ -172,9 +172,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
	}

	/* ras init of specific umc version */
	if (adev->umc.ras_funcs &&
	    adev->umc.ras_funcs->err_cnt_init)
		adev->umc.ras_funcs->err_cnt_init(adev);
	if (adev->umc.ras &&
	    adev->umc.ras->err_cnt_init)
		adev->umc.ras->err_cnt_init(adev);

	return 0;

+5 −9
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@
 */
#ifndef __AMDGPU_UMC_H__
#define __AMDGPU_UMC_H__
#include "amdgpu_ras.h"

/*
 * (addr / 256) * 4096, the higher 26 bits in ErrorAddr
@@ -40,14 +41,9 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))

struct amdgpu_umc_ras_funcs {
struct amdgpu_umc_ras {
	struct amdgpu_ras_block_object ras_block;
	void (*err_cnt_init)(struct amdgpu_device *adev);
	int (*ras_late_init)(struct amdgpu_device *adev);
	void (*ras_fini)(struct amdgpu_device *adev);
	void (*query_ras_error_count)(struct amdgpu_device *adev,
				      void *ras_error_status);
	void (*query_ras_error_address)(struct amdgpu_device *adev,
					void *ras_error_status);
	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
	void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
				      void *ras_error_status);
@@ -73,10 +69,10 @@ struct amdgpu_umc {
	struct ras_common_if *ras_if;

	const struct amdgpu_umc_funcs *funcs;
	const struct amdgpu_umc_ras_funcs *ras_funcs;
	struct amdgpu_umc_ras *ras;
};

int amdgpu_umc_ras_late_init(struct amdgpu_device *adev);
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info);
void amdgpu_umc_ras_fini(struct amdgpu_device *adev);
int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
		void *ras_error_status,
+15 −1
Original line number Diff line number Diff line
@@ -664,11 +664,25 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev)
		adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM;
		adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA;
		adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0];
		adev->umc.ras_funcs = &umc_v8_7_ras_funcs;
		adev->umc.ras = &umc_v8_7_ras;
		break;
	default:
		break;
	}
	if (adev->umc.ras) {
		amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);

		strcpy(adev->umc.ras->ras_block.name,"umc");
		adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC;

		/* If don't define special ras_late_init function, use default ras_late_init */
		if (!adev->umc.ras->ras_block.ras_late_init)
				adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;

		/* If don't define special ras_fini function, use default ras_fini */
		if (!adev->umc.ras->ras_block.ras_fini)
				adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini;
	}
}


Loading