Commit 3871d93b authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:
 "PMU driver updates:

   - Add AMD Last Branch Record Extension Version 2 (LbrExtV2) feature
     support for Zen 4 processors.

   - Extend the perf ABI to provide branch speculation information, if
     available, and use this on CPUs that have it (eg. LbrExtV2).

   - Improve Intel PEBS TSC timestamp handling & integration.

   - Add Intel Raptor Lake S CPU support.

   - Add 'perf mem' and 'perf c2c' memory profiling support on AMD CPUs
     by utilizing IBS tagged load/store samples.

   - Clean up & optimize various x86 PMU details.

  HW breakpoints:

   - Big rework to optimize the code for systems with hundreds of CPUs
     and thousands of breakpoints:

      - Replace the nr_bp_mutex global mutex with the bp_cpuinfo_sem
        per-CPU rwsem that is read-locked during most of the key
        operations.

      - Improve the O(#cpus * #tasks) logic in toggle_bp_slot() and
        fetch_bp_busy_slots().

      - Apply micro-optimizations & cleanups.

  - Misc cleanups & enhancements"

* tag 'perf-core-2022-10-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (75 commits)
  perf/hw_breakpoint: Annotate tsk->perf_event_mutex vs ctx->mutex
  perf: Fix pmu_filter_match()
  perf: Fix lockdep_assert_event_ctx()
  perf/x86/amd/lbr: Adjust LBR regardless of filtering
  perf/x86/utils: Fix uninitialized var in get_branch_type()
  perf/uapi: Define PERF_MEM_SNOOPX_PEER in kernel header file
  perf/x86/amd: Support PERF_SAMPLE_PHY_ADDR
  perf/x86/amd: Support PERF_SAMPLE_ADDR
  perf/x86/amd: Support PERF_SAMPLE_{WEIGHT|WEIGHT_STRUCT}
  perf/x86/amd: Support PERF_SAMPLE_DATA_SRC
  perf/x86/amd: Add IBS OP_DATA2 DataSrc bit definitions
  perf/mem: Introduce PERF_MEM_LVLNUM_{EXTN_MEM|IO}
  perf/x86/uncore: Add new Raptor Lake S support
  perf/x86/cstate: Add new Raptor Lake S support
  perf/x86/msr: Add new Raptor Lake S support
  perf/x86: Add new Raptor Lake S support
  bpf: Check flags for branch stack in bpf_read_branch_records helper
  perf, hw_breakpoint: Fix use-after-free if perf_event_open() fails
  perf: Use sample_flags for raw_data
  perf: Use sample_flags for addr
  ...
parents 30c99993 82aad7ff
Loading
Loading
Loading
Loading
+40 −13
Original line number Diff line number Diff line
@@ -15,6 +15,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/init.h>

@@ -129,7 +130,14 @@ struct breakpoint {
	bool ptrace_bp;
};

/*
 * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot
 * rely on it safely synchronizing internals here; however, we can rely on it
 * not requesting more breakpoints than available.
 */
static DEFINE_SPINLOCK(cpu_bps_lock);
static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]);
static DEFINE_SPINLOCK(task_bps_lock);
static LIST_HEAD(task_bps);

static struct breakpoint *alloc_breakpoint(struct perf_event *bp)
@@ -174,7 +182,9 @@ static int task_bps_add(struct perf_event *bp)
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);

	spin_lock(&task_bps_lock);
	list_add(&tmp->list, &task_bps);
	spin_unlock(&task_bps_lock);
	return 0;
}

@@ -182,6 +192,7 @@ static void task_bps_remove(struct perf_event *bp)
{
	struct list_head *pos, *q;

	spin_lock(&task_bps_lock);
	list_for_each_safe(pos, q, &task_bps) {
		struct breakpoint *tmp = list_entry(pos, struct breakpoint, list);

@@ -191,6 +202,7 @@ static void task_bps_remove(struct perf_event *bp)
			break;
		}
	}
	spin_unlock(&task_bps_lock);
}

/*
@@ -200,12 +212,17 @@ static void task_bps_remove(struct perf_event *bp)
static bool all_task_bps_check(struct perf_event *bp)
{
	struct breakpoint *tmp;
	bool ret = false;

	spin_lock(&task_bps_lock);
	list_for_each_entry(tmp, &task_bps, list) {
		if (!can_co_exist(tmp, bp))
			return true;
		if (!can_co_exist(tmp, bp)) {
			ret = true;
			break;
		}
	return false;
	}
	spin_unlock(&task_bps_lock);
	return ret;
}

/*
@@ -215,13 +232,18 @@ static bool all_task_bps_check(struct perf_event *bp)
static bool same_task_bps_check(struct perf_event *bp)
{
	struct breakpoint *tmp;
	bool ret = false;

	spin_lock(&task_bps_lock);
	list_for_each_entry(tmp, &task_bps, list) {
		if (tmp->bp->hw.target == bp->hw.target &&
		    !can_co_exist(tmp, bp))
			return true;
		    !can_co_exist(tmp, bp)) {
			ret = true;
			break;
		}
	return false;
	}
	spin_unlock(&task_bps_lock);
	return ret;
}

static int cpu_bps_add(struct perf_event *bp)
@@ -234,6 +256,7 @@ static int cpu_bps_add(struct perf_event *bp)
	if (IS_ERR(tmp))
		return PTR_ERR(tmp);

	spin_lock(&cpu_bps_lock);
	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
	for (i = 0; i < nr_wp_slots(); i++) {
		if (!cpu_bp[i]) {
@@ -241,6 +264,7 @@ static int cpu_bps_add(struct perf_event *bp)
			break;
		}
	}
	spin_unlock(&cpu_bps_lock);
	return 0;
}

@@ -249,6 +273,7 @@ static void cpu_bps_remove(struct perf_event *bp)
	struct breakpoint **cpu_bp;
	int i = 0;

	spin_lock(&cpu_bps_lock);
	cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu);
	for (i = 0; i < nr_wp_slots(); i++) {
		if (!cpu_bp[i])
@@ -260,19 +285,25 @@ static void cpu_bps_remove(struct perf_event *bp)
			break;
		}
	}
	spin_unlock(&cpu_bps_lock);
}

static bool cpu_bps_check(int cpu, struct perf_event *bp)
{
	struct breakpoint **cpu_bp;
	bool ret = false;
	int i;

	spin_lock(&cpu_bps_lock);
	cpu_bp = per_cpu_ptr(cpu_bps, cpu);
	for (i = 0; i < nr_wp_slots(); i++) {
		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp))
			return true;
		if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) {
			ret = true;
			break;
		}
	return false;
	}
	spin_unlock(&cpu_bps_lock);
	return ret;
}

static bool all_cpu_bps_check(struct perf_event *bp)
@@ -286,10 +317,6 @@ static bool all_cpu_bps_check(struct perf_event *bp)
	return false;
}

/*
 * We don't use any locks to serialize accesses to cpu_bps or task_bps
 * because are already inside nr_bp_mutex.
 */
int arch_reserve_bp_slot(struct perf_event *bp)
{
	int ret;
+7 −3
Original line number Diff line number Diff line
@@ -2314,16 +2314,20 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
			cpuhw = this_cpu_ptr(&cpu_hw_events);
			power_pmu_bhrb_read(event, cpuhw);
			data.br_stack = &cpuhw->bhrb_stack;
			data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
		}

		if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
						ppmu->get_mem_data_src)
						ppmu->get_mem_data_src) {
			ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
			data.sample_flags |= PERF_SAMPLE_DATA_SRC;
		}

		if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
						ppmu->get_mem_weight)
						ppmu->get_mem_weight) {
			ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);

			data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
		}
		if (perf_event_overflow(event, &data, regs))
			power_pmu_stop(event, 0);
	} else if (period) {
+1 −0
Original line number Diff line number Diff line
@@ -664,6 +664,7 @@ static int cfdiag_push_sample(struct perf_event *event,
		raw.frag.data = cpuhw->stop;
		raw.size = raw.frag.size;
		data.raw = &raw;
		data.sample_flags |= PERF_SAMPLE_RAW;
	}

	overflow = perf_event_overflow(event, &data, &regs);
+1 −0
Original line number Diff line number Diff line
@@ -366,6 +366,7 @@ static int paicrypt_push_sample(void)
		raw.frag.data = cpump->save;
		raw.size = raw.frag.size;
		data.raw = &raw;
		data.sample_flags |= PERF_SAMPLE_RAW;
	}

	overflow = perf_event_overflow(event, &data, &regs);
+1 −4
Original line number Diff line number Diff line
@@ -48,10 +48,7 @@ struct pmu;
/* Maximum number of UBC channels */
#define HBP_NUM		2

static inline int hw_breakpoint_slots(int type)
{
	return HBP_NUM;
}
#define hw_breakpoint_slots(type) (HBP_NUM)

/* arch/sh/kernel/hw_breakpoint.c */
extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
Loading