Commit ed27b5df authored by Shuai Xue's avatar Shuai Xue Committed by Borislav Petkov
Browse files

EDAC/ghes: Unify CPER memory error location reporting



Switch the GHES EDAC memory error reporting functions to use the common
CPER ones and get rid of code duplication.

  [ bp:
      - rewrite commit message, remove useless text
      - rip out useless reformatting
      - align function params on the opening brace
      - rename function to a more descriptive name
      - drop useless function exports
      - handle buffer lengths properly when printing other detail
      - remove useless casting
  ]

Signed-off-by: default avatarShuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: default avatarBorislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20220308144053.49090-3-xueshuai@linux.alibaba.com
parent bdae7965
Loading
Loading
Loading
Loading
+1 −0
Original line number Original line Diff line number Diff line
@@ -55,6 +55,7 @@ config EDAC_DECODE_MCE
config EDAC_GHES
config EDAC_GHES
	bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
	bool "Output ACPI APEI/GHES BIOS detected errors via EDAC"
	depends on ACPI_APEI_GHES && (EDAC=y)
	depends on ACPI_APEI_GHES && (EDAC=y)
	select UEFI_CPER
	help
	help
	  Not all machines support hardware-driven error report. Some of those
	  Not all machines support hardware-driven error report. Some of those
	  provide a BIOS-driven error report mechanism via ACPI, using the
	  provide a BIOS-driven error report mechanism via ACPI, using the
+37 −163
Original line number Original line Diff line number Diff line
@@ -15,11 +15,13 @@
#include "edac_module.h"
#include "edac_module.h"
#include <ras/ras_event.h>
#include <ras/ras_event.h>


#define OTHER_DETAIL_LEN	400

struct ghes_pvt {
struct ghes_pvt {
	struct mem_ctl_info *mci;
	struct mem_ctl_info *mci;


	/* Buffers for the error handling routine */
	/* Buffers for the error handling routine */
	char other_detail[400];
	char other_detail[OTHER_DETAIL_LEN];
	char msg[80];
	char msg[80];
};
};


@@ -235,8 +237,34 @@ static void ghes_scan_system(void)
	system_scanned = true;
	system_scanned = true;
}
}


static int print_mem_error_other_detail(const struct cper_sec_mem_err *mem, char *msg,
					const char *location, unsigned int len)
{
	u32 n;

	if (!msg)
		return 0;

	n = 0;
	len -= 1;

	n += scnprintf(msg + n, len - n, "APEI location: %s ", location);

	if (!(mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS))
		goto out;

	n += scnprintf(msg + n, len - n, "status(0x%016llx): ", mem->error_status);
	n += scnprintf(msg + n, len - n, "%s ", cper_mem_err_status_str(mem->error_status));

out:
	msg[n] = '\0';

	return n;
}

void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
{
{
	struct cper_mem_err_compact cmem;
	struct edac_raw_error_desc *e;
	struct edac_raw_error_desc *e;
	struct mem_ctl_info *mci;
	struct mem_ctl_info *mci;
	struct ghes_pvt *pvt;
	struct ghes_pvt *pvt;
@@ -292,60 +320,10 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)


	/* Error type, mapped on e->msg */
	/* Error type, mapped on e->msg */
	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
		u8 etype = mem_err->error_type;

		p = pvt->msg;
		p = pvt->msg;
		switch (mem_err->error_type) {
		p += snprintf(p, sizeof(pvt->msg), "%s", cper_mem_err_type_str(etype));
		case 0:
			p += sprintf(p, "Unknown");
			break;
		case 1:
			p += sprintf(p, "No error");
			break;
		case 2:
			p += sprintf(p, "Single-bit ECC");
			break;
		case 3:
			p += sprintf(p, "Multi-bit ECC");
			break;
		case 4:
			p += sprintf(p, "Single-symbol ChipKill ECC");
			break;
		case 5:
			p += sprintf(p, "Multi-symbol ChipKill ECC");
			break;
		case 6:
			p += sprintf(p, "Master abort");
			break;
		case 7:
			p += sprintf(p, "Target abort");
			break;
		case 8:
			p += sprintf(p, "Parity Error");
			break;
		case 9:
			p += sprintf(p, "Watchdog timeout");
			break;
		case 10:
			p += sprintf(p, "Invalid address");
			break;
		case 11:
			p += sprintf(p, "Mirror Broken");
			break;
		case 12:
			p += sprintf(p, "Memory Sparing");
			break;
		case 13:
			p += sprintf(p, "Scrub corrected error");
			break;
		case 14:
			p += sprintf(p, "Scrub uncorrected error");
			break;
		case 15:
			p += sprintf(p, "Physical Memory Map-out event");
			break;
		default:
			p += sprintf(p, "reserved error (%d)",
				     mem_err->error_type);
		}
	} else {
	} else {
		strcpy(pvt->msg, "unknown error");
		strcpy(pvt->msg, "unknown error");
	}
	}
@@ -362,52 +340,19 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)


	/* Memory error location, mapped on e->location */
	/* Memory error location, mapped on e->location */
	p = e->location;
	p = e->location;
	if (mem_err->validation_bits & CPER_MEM_VALID_NODE)
	cper_mem_err_pack(mem_err, &cmem);
		p += sprintf(p, "node:%d ", mem_err->node);
	p += cper_mem_err_location(&cmem, p);
	if (mem_err->validation_bits & CPER_MEM_VALID_CARD)

		p += sprintf(p, "card:%d ", mem_err->card);
	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE)
		p += sprintf(p, "module:%d ", mem_err->module);
	if (mem_err->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
		p += sprintf(p, "rank:%d ", mem_err->rank);
	if (mem_err->validation_bits & CPER_MEM_VALID_BANK)
		p += sprintf(p, "bank:%d ", mem_err->bank);
	if (mem_err->validation_bits & CPER_MEM_VALID_BANK_GROUP)
		p += sprintf(p, "bank_group:%d ",
			     mem_err->bank >> CPER_MEM_BANK_GROUP_SHIFT);
	if (mem_err->validation_bits & CPER_MEM_VALID_BANK_ADDRESS)
		p += sprintf(p, "bank_address:%d ",
			     mem_err->bank & CPER_MEM_BANK_ADDRESS_MASK);
	if (mem_err->validation_bits & (CPER_MEM_VALID_ROW | CPER_MEM_VALID_ROW_EXT)) {
		u32 row = mem_err->row;

		row |= cper_get_mem_extension(mem_err->validation_bits, mem_err->extended);
		p += sprintf(p, "row:%d ", row);
	}
	if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN)
		p += sprintf(p, "col:%d ", mem_err->column);
	if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION)
		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
		const char *bank = NULL, *device = NULL;
		struct dimm_info *dimm;
		struct dimm_info *dimm;


		dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
		p += cper_dimm_err_location(&cmem, p);
		if (bank != NULL && device != NULL)
			p += sprintf(p, "DIMM location:%s %s ", bank, device);
		else
			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
				     mem_err->mem_dev_handle);

		dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
		dimm = find_dimm_by_handle(mci, mem_err->mem_dev_handle);
		if (dimm) {
		if (dimm) {
			e->top_layer = dimm->idx;
			e->top_layer = dimm->idx;
			strcpy(e->label, dimm->label);
			strcpy(e->label, dimm->label);
		}
		}
	}
	}
	if (mem_err->validation_bits & CPER_MEM_VALID_CHIP_ID)
		p += sprintf(p, "chipID: %d ",
			     mem_err->extended >> CPER_MEM_CHIP_ID_SHIFT);
	if (p > e->location)
	if (p > e->location)
		*(p - 1) = '\0';
		*(p - 1) = '\0';


@@ -416,78 +361,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)


	/* All other fields are mapped on e->other_detail */
	/* All other fields are mapped on e->other_detail */
	p = pvt->other_detail;
	p = pvt->other_detail;
	p += snprintf(p, sizeof(pvt->other_detail),
	p += print_mem_error_other_detail(mem_err, p, e->location, OTHER_DETAIL_LEN);
		"APEI location: %s ", e->location);
	if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) {
		u64 status = mem_err->error_status;

		p += sprintf(p, "status(0x%016llx): ", (long long)status);
		switch ((status >> 8) & 0xff) {
		case 1:
			p += sprintf(p, "Error detected internal to the component ");
			break;
		case 16:
			p += sprintf(p, "Error detected in the bus ");
			break;
		case 4:
			p += sprintf(p, "Storage error in DRAM memory ");
			break;
		case 5:
			p += sprintf(p, "Storage error in TLB ");
			break;
		case 6:
			p += sprintf(p, "Storage error in cache ");
			break;
		case 7:
			p += sprintf(p, "Error in one or more functional units ");
			break;
		case 8:
			p += sprintf(p, "component failed self test ");
			break;
		case 9:
			p += sprintf(p, "Overflow or undervalue of internal queue ");
			break;
		case 17:
			p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR ");
			break;
		case 18:
			p += sprintf(p, "Improper access error ");
			break;
		case 19:
			p += sprintf(p, "Access to a memory address which is not mapped to any component ");
			break;
		case 20:
			p += sprintf(p, "Loss of Lockstep ");
			break;
		case 21:
			p += sprintf(p, "Response not associated with a request ");
			break;
		case 22:
			p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits ");
			break;
		case 23:
			p += sprintf(p, "Detection of a PATH_ERROR ");
			break;
		case 25:
			p += sprintf(p, "Bus operation timeout ");
			break;
		case 26:
			p += sprintf(p, "A read was issued to data that has been poisoned ");
			break;
		default:
			p += sprintf(p, "reserved ");
			break;
		}
	}
	if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
		p += sprintf(p, "requestorID: 0x%016llx ",
			     (long long)mem_err->requestor_id);
	if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
		p += sprintf(p, "responderID: 0x%016llx ",
			     (long long)mem_err->responder_id);
	if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID)
		p += sprintf(p, "targetID: 0x%016llx ",
			     (long long)mem_err->responder_id);
	if (p > pvt->other_detail)
	if (p > pvt->other_detail)
		*(p - 1) = '\0';
		*(p - 1) = '\0';


+2 −2
Original line number Original line Diff line number Diff line
@@ -237,7 +237,7 @@ const char *cper_mem_err_status_str(u64 status)
}
}
EXPORT_SYMBOL_GPL(cper_mem_err_status_str);
EXPORT_SYMBOL_GPL(cper_mem_err_status_str);


static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
{
{
	u32 len, n;
	u32 len, n;


@@ -291,7 +291,7 @@ static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
	return n;
	return n;
}
}


static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg)
{
{
	u32 len, n;
	u32 len, n;
	const char *bank = NULL, *device = NULL;
	const char *bank = NULL, *device = NULL;
+2 −0
Original line number Original line Diff line number Diff line
@@ -569,5 +569,7 @@ void cper_print_proc_arm(const char *pfx,
			 const struct cper_sec_proc_arm *proc);
			 const struct cper_sec_proc_arm *proc);
void cper_print_proc_ia(const char *pfx,
void cper_print_proc_ia(const char *pfx,
			const struct cper_sec_proc_ia *proc);
			const struct cper_sec_proc_ia *proc);
int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg);
int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg);


#endif
#endif