Loading arch/x86/oprofile/nmi_int.c +51 −50 Original line number Diff line number Diff line Loading @@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; /* common functions */ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config) { u64 val = 0; u16 event = (u16)counter_config->event; val |= ARCH_PERFMON_EVENTSEL_INT; val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; val |= (counter_config->unit_mask & 0xFF) << 8; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; val |= (event & 0x0F00) << 24; return val; } static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { Loading @@ -52,26 +72,18 @@ static int profile_exceptions_notify(struct notifier_block *self, static void nmi_cpu_save_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; for (i = 0; i < nr_ctrs; ++i) { if (counters[i].addr) { rdmsr(counters[i].addr, counters[i].saved.low, counters[i].saved.high); } for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) rdmsrl(counters[i].addr, counters[i].saved); } for (i = 0; i < nr_ctrls; ++i) { if (controls[i].addr) { rdmsr(controls[i].addr, controls[i].saved.low, controls[i].saved.high); } for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) rdmsrl(controls[i].addr, controls[i].saved); } } Loading Loading @@ -126,7 +138,7 @@ static void nmi_cpu_setup(void *dummy) int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); spin_lock(&oprofilefs_lock); model->setup_ctrs(msrs); model->setup_ctrs(model, msrs); spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); Loading Loading @@ -178,26 +190,18 @@ static int nmi_setup(void) static void nmi_restore_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; for (i = 0; i < nr_ctrls; ++i) { if (controls[i].addr) { wrmsr(controls[i].addr, controls[i].saved.low, controls[i].saved.high); } for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) wrmsrl(controls[i].addr, controls[i].saved); } for (i = 0; i < nr_ctrs; ++i) { if (counters[i].addr) { wrmsr(counters[i].addr, counters[i].saved.low, counters[i].saved.high); } for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) wrmsrl(counters[i].addr, counters[i].saved); } } Loading Loading @@ -402,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */ if (force_arch_perfmon && cpu_has_arch_perfmon) return 0; Loading @@ -428,7 +433,7 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/core_2"; break; case 26: arch_perfmon_setup_counters(); spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; break; case 28: Loading @@ -439,17 +444,7 @@ static int __init ppro_init(char **cpu_type) return 0; } model = &op_ppro_spec; return 1; } static int __init arch_perfmon_init(char **cpu_type) { if (!cpu_has_arch_perfmon) return 0; *cpu_type = "i386/arch_perfmon"; model = &op_arch_perfmon_spec; arch_perfmon_setup_counters(); model = spec; return 1; } Loading @@ -471,27 +466,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) /* Needs to be at least an Athlon (or hammer in 32bit mode) */ switch (family) { default: return -ENODEV; case 6: model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: model = &op_amd_spec; /* Actually it could be i386/hammer too, but give user space an consistent name. */ /* * Actually it could be i386/hammer too, but * give user space an consistent name. */ cpu_type = "x86-64/hammer"; break; case 0x10: model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; default: return -ENODEV; } model = &op_amd_spec; break; case X86_VENDOR_INTEL: Loading @@ -510,8 +504,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) break; } if (!cpu_type && !arch_perfmon_init(&cpu_type)) if (cpu_type) break; if (!cpu_has_arch_perfmon) return -ENODEV; /* use arch perfmon as fallback */ cpu_type = "i386/arch_perfmon"; model = &op_arch_perfmon_spec; break; default: Loading arch/x86/oprofile/op_model_amd.c +118 −149 Original line number Diff line number Diff line Loading @@ -25,40 +25,25 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 #define OP_EVENT_MASK 0x0FFF #define OP_CTR_OVERFLOW (1ULL<<31) #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_CLEAR_LO(x) (x &= (1<<21)) #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) #define CTRL_SET_ENABLE(val) (val |= 1<<20) #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) #define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) static unsigned long reset_value[NUM_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ #define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL /*IbsOpCtl bits */ #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 Loading Loading @@ -99,49 +84,38 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) } } static void op_amd_setup_ctrs(struct op_msrs const * const msrs) static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_WRITE(low, high, msrs, i); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) if (unlikely(!msrs->counters[i].addr)) continue; CTR_WRITE(1, msrs, i); wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_SET_ENABLE(low); CTRL_SET_USR(low, counter_config[i].user); CTRL_SET_KERN(low, counter_config[i].kernel); CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT_LOW(low, counter_config[i].event); CTRL_SET_EVENT_HIGH(high, counter_config[i].event); CTRL_SET_HOST_ONLY(high, 0); CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); wrmsrl(msrs->counters[i].addr, -(s64)counter_config[i].count); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; val |= op_x86_get_ctrl(model, &counter_config[i]); wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } Loading @@ -154,93 +128,116 @@ static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { u32 low, high; u64 msr; u64 val, ctl; struct op_entry entry; if (!has_ibs) return 1; if (ibs_config.fetch_enabled) { rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); if (high & IBS_FETCH_HIGH_VALID_BIT) { rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); oprofile_write_reserve(&entry, regs, msr, rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); if (ctl & IBS_FETCH_VAL) { rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data(&entry, low); oprofile_add_data(&entry, high); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, ctl); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ high &= ~IBS_FETCH_HIGH_VALID_BIT; high |= IBS_FETCH_HIGH_ENABLE; low &= IBS_FETCH_LOW_MAX_CNT_MASK; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); ctl |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } } if (ibs_config.op_enabled) { rdmsr(MSR_AMD64_IBSOPCTL, low, high); if (low & IBS_OP_LOW_VALID_BIT) { rdmsrl(MSR_AMD64_IBSOPRIP, msr); oprofile_write_reserve(&entry, regs, msr, rdmsrl(MSR_AMD64_IBSOPCTL, ctl); if (ctl & IBS_OP_VAL) { rdmsrl(MSR_AMD64_IBSOPRIP, val); oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA2, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA3, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCLINAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA2, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA3, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCLINAD, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ high = 0; low &= ~IBS_OP_LOW_VALID_BIT; low |= IBS_OP_LOW_ENABLE; wrmsr(MSR_AMD64_IBSOPCTL, low, high); ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; ctl |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } return 1; } static inline void op_amd_start_ibs(void) { u64 val; if (has_ibs && ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (has_ibs && ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; val |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, val); } } static void op_amd_stop_ibs(void) { if (has_ibs && ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); if (has_ibs && ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } #else static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } #endif static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { rdmsrl(msrs->counters[i].addr, val); /* bit is clear if overflowed: */ if (val & OP_CTR_OVERFLOW) continue; oprofile_add_sample(regs, i); CTR_WRITE(reset_value[i], msrs, i); } wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]); } #ifdef CONFIG_OPROFILE_IBS op_amd_handle_ibs(regs, msrs); #endif /* See op_model_ppro.c */ return 1; Loading @@ -248,38 +245,22 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, static void op_amd_start(struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); rdmsrl(msrs->controls[i].addr, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); } } #ifdef CONFIG_OPROFILE_IBS if (has_ibs && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ + IBS_FETCH_HIGH_ENABLE; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); op_amd_start_ibs(); } if (has_ibs && ibs_config.op_enabled) { low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ + IBS_OP_LOW_ENABLE; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } #endif } static void op_amd_stop(struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; /* Loading @@ -289,26 +270,12 @@ static void op_amd_stop(struct op_msrs const * const msrs) for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } #ifdef CONFIG_OPROFILE_IBS if (has_ibs && ibs_config.fetch_enabled) { /* clear max count and enable */ low = 0; high = 0; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); rdmsrl(msrs->controls[i].addr, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); } if (has_ibs && ibs_config.op_enabled) { /* clear max count and enable */ low = 0; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } #endif op_amd_stop_ibs(); } static void op_amd_shutdown(struct op_msrs const * const msrs) Loading @@ -316,11 +283,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } for (i = 0 ; i < NUM_CONTROLS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } } Loading Loading @@ -491,14 +458,16 @@ static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ struct op_x86_model_spec const op_amd_spec = { .init = op_amd_init, .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, .exit = op_amd_exit, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, .shutdown = &op_amd_shutdown .shutdown = &op_amd_shutdown, }; arch/x86/oprofile/op_model_p4.c +29 −31 Original line number Diff line number Diff line Loading @@ -32,6 +32,8 @@ #define NUM_CCCRS_HT2 9 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) #define OP_CTR_OVERFLOW (1ULL<<31) static unsigned int num_counters = NUM_COUNTERS_NON_HT; static unsigned int num_controls = NUM_CONTROLS_NON_HT; Loading Loading @@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) #define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) Loading @@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) /* this assigns a "stagger" to the current CPU, which is used throughout the code in this module as an extra array offset, to select the "even" Loading Loading @@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) if (ev->bindings[i].virt_counter & counter_bit) { /* modify ESCR */ ESCR_READ(escr, high, ev, i); rdmsr(ev->bindings[i].escr_address, escr, high); ESCR_CLEAR(escr); if (stag == 0) { ESCR_SET_USR_0(escr, counter_config[ctr].user); Loading @@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } ESCR_SET_EVENT_SELECT(escr, ev->event_select); ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); ESCR_WRITE(escr, high, ev, i); wrmsr(ev->bindings[i].escr_address, escr, high); /* modify CCCR */ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, cccr, high); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); Loading @@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) CCCR_SET_PMI_OVF_0(cccr); else CCCR_SET_PMI_OVF_1(cccr); CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, cccr, high); return; } } Loading @@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } static void p4_setup_ctrs(struct op_msrs const * const msrs) static void p4_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; Loading @@ -564,7 +559,7 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); Loading @@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, -(s64)counter_config[i].count); } else { reset_value[i] = 0; } Loading Loading @@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, real = VIRT_CTR(stag, i); CCCR_READ(low, high, real); CTR_READ(ctr, high, real); if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { rdmsr(p4_counters[real].cccr_address, low, high); rdmsr(p4_counters[real].counter_address, ctr, high); if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); CTR_WRITE(reset_value[i], real); wrmsrl(p4_counters[real].counter_address, -(s64)reset_value[i]); CCCR_CLEAR_OVF(low); CCCR_WRITE(low, high, real); CTR_WRITE(reset_value[i], real); wrmsr(p4_counters[real].cccr_address, low, high); wrmsrl(p4_counters[real].counter_address, -(s64)reset_value[i]); } } Loading @@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_ENABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } Loading @@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_DISABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } Loading @@ -681,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { if (CTR_IS_RESERVED(msrs, i)) if (msrs->counters[i].addr) release_perfctr_nmi(msrs->counters[i].addr); } /* Loading @@ -690,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) * This saves a few bits. */ for (i = num_counters ; i < num_controls ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) if (msrs->controls[i].addr) release_evntsel_nmi(msrs->controls[i].addr); } } Loading arch/x86/oprofile/op_model_ppro.c +42 −53 File changed.Preview size limit exceeded, changes collapsed. Show changes arch/x86/oprofile/op_x86_model.h +27 −20 Original line number Diff line number Diff line Loading @@ -6,19 +6,18 @@ * @remark Read the file COPYING * * @author Graydon Hoare * @author Robert Richter <robert.richter@amd.com> */ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H struct op_saved_msr { unsigned int high; unsigned int low; }; #include <asm/types.h> #include <asm/perf_counter.h> struct op_msr { unsigned long addr; struct op_saved_msr saved; u64 saved; }; struct op_msrs { Loading @@ -28,16 +27,21 @@ struct op_msrs { struct pt_regs; struct oprofile_operations; /* The model vtable abstracts the differences between * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { int (*init)(struct oprofile_operations *ops); void (*exit)(void); unsigned int num_counters; unsigned int num_controls; u64 reserved; u16 event_mask; int (*init)(struct oprofile_operations *ops); void (*exit)(void); void (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_msrs const * const msrs); void (*setup_ctrs)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); Loading @@ -45,12 +49,15 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec op_ppro_spec; struct op_counter_config; extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; extern void arch_perfmon_setup_counters(void); #endif /* OP_X86_MODEL_H */ Loading
arch/x86/oprofile/nmi_int.c +51 −50 Original line number Diff line number Diff line Loading @@ -31,6 +31,26 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; /* common functions */ u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config) { u64 val = 0; u16 event = (u16)counter_config->event; val |= ARCH_PERFMON_EVENTSEL_INT; val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0; val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0; val |= (counter_config->unit_mask & 0xFF) << 8; event &= model->event_mask ? model->event_mask : 0xFF; val |= event & 0xFF; val |= (event & 0x0F00) << 24; return val; } static int profile_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { Loading @@ -52,26 +72,18 @@ static int profile_exceptions_notify(struct notifier_block *self, static void nmi_cpu_save_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; for (i = 0; i < nr_ctrs; ++i) { if (counters[i].addr) { rdmsr(counters[i].addr, counters[i].saved.low, counters[i].saved.high); } for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) rdmsrl(counters[i].addr, counters[i].saved); } for (i = 0; i < nr_ctrls; ++i) { if (controls[i].addr) { rdmsr(controls[i].addr, controls[i].saved.low, controls[i].saved.high); } for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) rdmsrl(controls[i].addr, controls[i].saved); } } Loading Loading @@ -126,7 +138,7 @@ static void nmi_cpu_setup(void *dummy) int cpu = smp_processor_id(); struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu); spin_lock(&oprofilefs_lock); model->setup_ctrs(msrs); model->setup_ctrs(model, msrs); spin_unlock(&oprofilefs_lock); per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC); apic_write(APIC_LVTPC, APIC_DM_NMI); Loading Loading @@ -178,26 +190,18 @@ static int nmi_setup(void) static void nmi_restore_registers(struct op_msrs *msrs) { unsigned int const nr_ctrs = model->num_counters; unsigned int const nr_ctrls = model->num_controls; struct op_msr *counters = msrs->counters; struct op_msr *controls = msrs->controls; unsigned int i; for (i = 0; i < nr_ctrls; ++i) { if (controls[i].addr) { wrmsr(controls[i].addr, controls[i].saved.low, controls[i].saved.high); } for (i = 0; i < model->num_controls; ++i) { if (controls[i].addr) wrmsrl(controls[i].addr, controls[i].saved); } for (i = 0; i < nr_ctrs; ++i) { if (counters[i].addr) { wrmsr(counters[i].addr, counters[i].saved.low, counters[i].saved.high); } for (i = 0; i < model->num_counters; ++i) { if (counters[i].addr) wrmsrl(counters[i].addr, counters[i].saved); } } Loading Loading @@ -402,6 +406,7 @@ module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); static int __init ppro_init(char **cpu_type) { __u8 cpu_model = boot_cpu_data.x86_model; struct op_x86_model_spec const *spec = &op_ppro_spec; /* default */ if (force_arch_perfmon && cpu_has_arch_perfmon) return 0; Loading @@ -428,7 +433,7 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/core_2"; break; case 26: arch_perfmon_setup_counters(); spec = &op_arch_perfmon_spec; *cpu_type = "i386/core_i7"; break; case 28: Loading @@ -439,17 +444,7 @@ static int __init ppro_init(char **cpu_type) return 0; } model = &op_ppro_spec; return 1; } static int __init arch_perfmon_init(char **cpu_type) { if (!cpu_has_arch_perfmon) return 0; *cpu_type = "i386/arch_perfmon"; model = &op_arch_perfmon_spec; arch_perfmon_setup_counters(); model = spec; return 1; } Loading @@ -471,27 +466,26 @@ int __init op_nmi_init(struct oprofile_operations *ops) /* Needs to be at least an Athlon (or hammer in 32bit mode) */ switch (family) { default: return -ENODEV; case 6: model = &op_amd_spec; cpu_type = "i386/athlon"; break; case 0xf: model = &op_amd_spec; /* Actually it could be i386/hammer too, but give user space an consistent name. */ /* * Actually it could be i386/hammer too, but * give user space an consistent name. */ cpu_type = "x86-64/hammer"; break; case 0x10: model = &op_amd_spec; cpu_type = "x86-64/family10"; break; case 0x11: model = &op_amd_spec; cpu_type = "x86-64/family11h"; break; default: return -ENODEV; } model = &op_amd_spec; break; case X86_VENDOR_INTEL: Loading @@ -510,8 +504,15 @@ int __init op_nmi_init(struct oprofile_operations *ops) break; } if (!cpu_type && !arch_perfmon_init(&cpu_type)) if (cpu_type) break; if (!cpu_has_arch_perfmon) return -ENODEV; /* use arch perfmon as fallback */ cpu_type = "i386/arch_perfmon"; model = &op_arch_perfmon_spec; break; default: Loading
arch/x86/oprofile/op_model_amd.c +118 −149 Original line number Diff line number Diff line Loading @@ -25,40 +25,25 @@ #define NUM_COUNTERS 4 #define NUM_CONTROLS 4 #define OP_EVENT_MASK 0x0FFF #define OP_CTR_OVERFLOW (1ULL<<31) #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) #define CTR_WRITE(l, msrs, c) do {wrmsr(msrs->counters[(c)].addr, -(unsigned int)(l), -1); } while (0) #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_WRITE(l, h, msrs, c) do {wrmsr(msrs->controls[(c)].addr, (l), (h)); } while (0) #define CTRL_SET_ACTIVE(n) (n |= (1<<22)) #define CTRL_SET_INACTIVE(n) (n &= ~(1<<22)) #define CTRL_CLEAR_LO(x) (x &= (1<<21)) #define CTRL_CLEAR_HI(x) (x &= 0xfffffcf0) #define CTRL_SET_ENABLE(val) (val |= 1<<20) #define CTRL_SET_USR(val, u) (val |= ((u & 1) << 16)) #define CTRL_SET_KERN(val, k) (val |= ((k & 1) << 17)) #define CTRL_SET_UM(val, m) (val |= (m << 8)) #define CTRL_SET_EVENT_LOW(val, e) (val |= (e & 0xff)) #define CTRL_SET_EVENT_HIGH(val, e) (val |= ((e >> 8) & 0xf)) #define CTRL_SET_HOST_ONLY(val, h) (val |= ((h & 1) << 9)) #define CTRL_SET_GUEST_ONLY(val, h) (val |= ((h & 1) << 8)) #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21)) static unsigned long reset_value[NUM_COUNTERS]; #ifdef CONFIG_OPROFILE_IBS /* IbsFetchCtl bits/masks */ #define IBS_FETCH_HIGH_VALID_BIT (1UL << 17) /* bit 49 */ #define IBS_FETCH_HIGH_ENABLE (1UL << 16) /* bit 48 */ #define IBS_FETCH_LOW_MAX_CNT_MASK 0x0000FFFFUL /* MaxCnt mask */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL /*IbsOpCtl bits */ #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 Loading Loading @@ -99,49 +84,38 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) } } static void op_amd_setup_ctrs(struct op_msrs const * const msrs) static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; /* clear all counters */ for (i = 0 ; i < NUM_CONTROLS; ++i) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_WRITE(low, high, msrs, i); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } /* avoid a false detection of ctr overflows in NMI handler */ for (i = 0; i < NUM_COUNTERS; ++i) { if (unlikely(!CTR_IS_RESERVED(msrs, i))) if (unlikely(!msrs->counters[i].addr)) continue; CTR_WRITE(1, msrs, i); wrmsrl(msrs->counters[i].addr, -1LL); } /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs, i))) { if (counter_config[i].enabled && msrs->counters[i].addr) { reset_value[i] = counter_config[i].count; CTR_WRITE(counter_config[i].count, msrs, i); CTRL_READ(low, high, msrs, i); CTRL_CLEAR_LO(low); CTRL_CLEAR_HI(high); CTRL_SET_ENABLE(low); CTRL_SET_USR(low, counter_config[i].user); CTRL_SET_KERN(low, counter_config[i].kernel); CTRL_SET_UM(low, counter_config[i].unit_mask); CTRL_SET_EVENT_LOW(low, counter_config[i].event); CTRL_SET_EVENT_HIGH(high, counter_config[i].event); CTRL_SET_HOST_ONLY(high, 0); CTRL_SET_GUEST_ONLY(high, 0); CTRL_WRITE(low, high, msrs, i); wrmsrl(msrs->counters[i].addr, -(s64)counter_config[i].count); rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; val |= op_x86_get_ctrl(model, &counter_config[i]); wrmsrl(msrs->controls[i].addr, val); } else { reset_value[i] = 0; } Loading @@ -154,93 +128,116 @@ static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { u32 low, high; u64 msr; u64 val, ctl; struct op_entry entry; if (!has_ibs) return 1; if (ibs_config.fetch_enabled) { rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); if (high & IBS_FETCH_HIGH_VALID_BIT) { rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); oprofile_write_reserve(&entry, regs, msr, rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl); if (ctl & IBS_FETCH_VAL) { rdmsrl(MSR_AMD64_IBSFETCHLINAD, val); oprofile_write_reserve(&entry, regs, val, IBS_FETCH_CODE, IBS_FETCH_SIZE); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data(&entry, low); oprofile_add_data(&entry, high); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data64(&entry, val); oprofile_add_data64(&entry, ctl); rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val); oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ high &= ~IBS_FETCH_HIGH_VALID_BIT; high |= IBS_FETCH_HIGH_ENABLE; low &= IBS_FETCH_LOW_MAX_CNT_MASK; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); ctl |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } } if (ibs_config.op_enabled) { rdmsr(MSR_AMD64_IBSOPCTL, low, high); if (low & IBS_OP_LOW_VALID_BIT) { rdmsrl(MSR_AMD64_IBSOPRIP, msr); oprofile_write_reserve(&entry, regs, msr, rdmsrl(MSR_AMD64_IBSOPCTL, ctl); if (ctl & IBS_OP_VAL) { rdmsrl(MSR_AMD64_IBSOPRIP, val); oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE, IBS_OP_SIZE); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA2, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSOPDATA3, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCLINAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); oprofile_add_data(&entry, (u32)msr); oprofile_add_data(&entry, (u32)(msr >> 32)); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA2, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSOPDATA3, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCLINAD, val); oprofile_add_data64(&entry, val); rdmsrl(MSR_AMD64_IBSDCPHYSAD, val); oprofile_add_data64(&entry, val); oprofile_write_commit(&entry); /* reenable the IRQ */ high = 0; low &= ~IBS_OP_LOW_VALID_BIT; low |= IBS_OP_LOW_ENABLE; wrmsr(MSR_AMD64_IBSOPCTL, low, high); ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; ctl |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } return 1; } static inline void op_amd_start_ibs(void) { u64 val; if (has_ibs && ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } if (has_ibs && ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; val |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, val); } } static void op_amd_stop_ibs(void) { if (has_ibs && ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); if (has_ibs && ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } #else static inline int op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) { } static inline void op_amd_start_ibs(void) { } static inline void op_amd_stop_ibs(void) { } #endif static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; for (i = 0 ; i < NUM_COUNTERS; ++i) { if (!reset_value[i]) continue; CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { rdmsrl(msrs->counters[i].addr, val); /* bit is clear if overflowed: */ if (val & OP_CTR_OVERFLOW) continue; oprofile_add_sample(regs, i); CTR_WRITE(reset_value[i], msrs, i); } wrmsrl(msrs->counters[i].addr, -(s64)reset_value[i]); } #ifdef CONFIG_OPROFILE_IBS op_amd_handle_ibs(regs, msrs); #endif /* See op_model_ppro.c */ return 1; Loading @@ -248,38 +245,22 @@ static int op_amd_check_ctrs(struct pt_regs * const regs, static void op_amd_start(struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); CTRL_SET_ACTIVE(low); CTRL_WRITE(low, high, msrs, i); rdmsrl(msrs->controls[i].addr, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); } } #ifdef CONFIG_OPROFILE_IBS if (has_ibs && ibs_config.fetch_enabled) { low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ + IBS_FETCH_HIGH_ENABLE; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); op_amd_start_ibs(); } if (has_ibs && ibs_config.op_enabled) { low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ + IBS_OP_LOW_ENABLE; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } #endif } static void op_amd_stop(struct op_msrs const * const msrs) { unsigned int low, high; u64 val; int i; /* Loading @@ -289,26 +270,12 @@ static void op_amd_stop(struct op_msrs const * const msrs) for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (!reset_value[i]) continue; CTRL_READ(low, high, msrs, i); CTRL_SET_INACTIVE(low); CTRL_WRITE(low, high, msrs, i); } #ifdef CONFIG_OPROFILE_IBS if (has_ibs && ibs_config.fetch_enabled) { /* clear max count and enable */ low = 0; high = 0; wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); rdmsrl(msrs->controls[i].addr, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; wrmsrl(msrs->controls[i].addr, val); } if (has_ibs && ibs_config.op_enabled) { /* clear max count and enable */ low = 0; high = 0; wrmsr(MSR_AMD64_IBSOPCTL, low, high); } #endif op_amd_stop_ibs(); } static void op_amd_shutdown(struct op_msrs const * const msrs) Loading @@ -316,11 +283,11 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < NUM_COUNTERS ; ++i) { if (CTR_IS_RESERVED(msrs, i)) if (msrs->counters[i].addr) release_perfctr_nmi(MSR_K7_PERFCTR0 + i); } for (i = 0 ; i < NUM_CONTROLS ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) if (msrs->controls[i].addr) release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); } } Loading Loading @@ -491,14 +458,16 @@ static void op_amd_exit(void) {} #endif /* CONFIG_OPROFILE_IBS */ struct op_x86_model_spec const op_amd_spec = { .init = op_amd_init, .exit = op_amd_exit, .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, .reserved = MSR_AMD_EVENTSEL_RESERVED, .event_mask = OP_EVENT_MASK, .init = op_amd_init, .exit = op_amd_exit, .fill_in_addresses = &op_amd_fill_in_addresses, .setup_ctrs = &op_amd_setup_ctrs, .check_ctrs = &op_amd_check_ctrs, .start = &op_amd_start, .stop = &op_amd_stop, .shutdown = &op_amd_shutdown .shutdown = &op_amd_shutdown, };
arch/x86/oprofile/op_model_p4.c +29 −31 Original line number Diff line number Diff line Loading @@ -32,6 +32,8 @@ #define NUM_CCCRS_HT2 9 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2) #define OP_CTR_OVERFLOW (1ULL<<31) static unsigned int num_counters = NUM_COUNTERS_NON_HT; static unsigned int num_controls = NUM_CONTROLS_NON_HT; Loading Loading @@ -350,8 +352,6 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) #define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) Loading @@ -361,17 +361,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) #define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) #define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) #define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) /* this assigns a "stagger" to the current CPU, which is used throughout the code in this module as an extra array offset, to select the "even" Loading Loading @@ -515,7 +507,7 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) if (ev->bindings[i].virt_counter & counter_bit) { /* modify ESCR */ ESCR_READ(escr, high, ev, i); rdmsr(ev->bindings[i].escr_address, escr, high); ESCR_CLEAR(escr); if (stag == 0) { ESCR_SET_USR_0(escr, counter_config[ctr].user); Loading @@ -526,10 +518,11 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } ESCR_SET_EVENT_SELECT(escr, ev->event_select); ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); ESCR_WRITE(escr, high, ev, i); wrmsr(ev->bindings[i].escr_address, escr, high); /* modify CCCR */ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, cccr, high); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); Loading @@ -537,7 +530,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) CCCR_SET_PMI_OVF_0(cccr); else CCCR_SET_PMI_OVF_1(cccr); CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address, cccr, high); return; } } Loading @@ -548,7 +542,8 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) } static void p4_setup_ctrs(struct op_msrs const * const msrs) static void p4_setup_ctrs(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { unsigned int i; unsigned int low, high; Loading @@ -564,7 +559,7 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); Loading @@ -574,17 +569,18 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { if (unlikely(!CTRL_IS_RESERVED(msrs, i))) if (unlikely(!msrs->controls[i].addr)) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { if (counter_config[i].enabled && msrs->controls[i].addr) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address, -(s64)counter_config[i].count); } else { reset_value[i] = 0; } Loading Loading @@ -624,14 +620,16 @@ static int p4_check_ctrs(struct pt_regs * const regs, real = VIRT_CTR(stag, i); CCCR_READ(low, high, real); CTR_READ(ctr, high, real); if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { rdmsr(p4_counters[real].cccr_address, low, high); rdmsr(p4_counters[real].counter_address, ctr, high); if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) { oprofile_add_sample(regs, i); CTR_WRITE(reset_value[i], real); wrmsrl(p4_counters[real].counter_address, -(s64)reset_value[i]); CCCR_CLEAR_OVF(low); CCCR_WRITE(low, high, real); CTR_WRITE(reset_value[i], real); wrmsr(p4_counters[real].cccr_address, low, high); wrmsrl(p4_counters[real].counter_address, -(s64)reset_value[i]); } } Loading @@ -653,9 +651,9 @@ static void p4_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_ENABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } Loading @@ -670,9 +668,9 @@ static void p4_stop(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; CCCR_READ(low, high, VIRT_CTR(stag, i)); rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_SET_DISABLE(low); CCCR_WRITE(low, high, VIRT_CTR(stag, i)); wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); } } Loading @@ -681,7 +679,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { if (CTR_IS_RESERVED(msrs, i)) if (msrs->counters[i].addr) release_perfctr_nmi(msrs->counters[i].addr); } /* Loading @@ -690,7 +688,7 @@ static void p4_shutdown(struct op_msrs const * const msrs) * This saves a few bits. */ for (i = num_counters ; i < num_controls ; ++i) { if (CTRL_IS_RESERVED(msrs, i)) if (msrs->controls[i].addr) release_evntsel_nmi(msrs->controls[i].addr); } } Loading
arch/x86/oprofile/op_model_ppro.c +42 −53 File changed.Preview size limit exceeded, changes collapsed. Show changes
arch/x86/oprofile/op_x86_model.h +27 −20 Original line number Diff line number Diff line Loading @@ -6,19 +6,18 @@ * @remark Read the file COPYING * * @author Graydon Hoare * @author Robert Richter <robert.richter@amd.com> */ #ifndef OP_X86_MODEL_H #define OP_X86_MODEL_H struct op_saved_msr { unsigned int high; unsigned int low; }; #include <asm/types.h> #include <asm/perf_counter.h> struct op_msr { unsigned long addr; struct op_saved_msr saved; u64 saved; }; struct op_msrs { Loading @@ -28,16 +27,21 @@ struct op_msrs { struct pt_regs; struct oprofile_operations; /* The model vtable abstracts the differences between * various x86 CPU models' perfctr support. */ struct op_x86_model_spec { int (*init)(struct oprofile_operations *ops); void (*exit)(void); unsigned int num_counters; unsigned int num_controls; u64 reserved; u16 event_mask; int (*init)(struct oprofile_operations *ops); void (*exit)(void); void (*fill_in_addresses)(struct op_msrs * const msrs); void (*setup_ctrs)(struct op_msrs const * const msrs); void (*setup_ctrs)(struct op_x86_model_spec const *model, struct op_msrs const * const msrs); int (*check_ctrs)(struct pt_regs * const regs, struct op_msrs const * const msrs); void (*start)(struct op_msrs const * const msrs); Loading @@ -45,12 +49,15 @@ struct op_x86_model_spec { void (*shutdown)(struct op_msrs const * const msrs); }; extern struct op_x86_model_spec op_ppro_spec; struct op_counter_config; extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern struct op_x86_model_spec const op_ppro_spec; extern struct op_x86_model_spec const op_p4_spec; extern struct op_x86_model_spec const op_p4_ht2_spec; extern struct op_x86_model_spec const op_amd_spec; extern struct op_x86_model_spec op_arch_perfmon_spec; extern void arch_perfmon_setup_counters(void); #endif /* OP_X86_MODEL_H */