mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-23 07:23:12 +02:00
Performance events updates for v6.16:
Core & generic-arch updates: - Add support for dynamic constraints and propagate it to the Intel driver (Kan Liang) - Fix & enhance driver-specific throttling support (Kan Liang) - Record sample last_period before updating on the x86 and PowerPC platforms (Mark Barnett) - Make perf_pmu_unregister() usable (Peter Zijlstra) - Unify perf_event_free_task() / perf_event_exit_task_context() (Peter Zijlstra) - Simplify perf_event_release_kernel() and perf_event_free_task() (Peter Zijlstra) - Allocate non-contiguous AUX pages by default (Yabin Cui) Uprobes updates: - Add support to emulate NOP instructions (Jiri Olsa) - selftests/bpf: Add 5-byte NOP uprobe trigger benchmark (Jiri Olsa) x86 Intel PMU enhancements: - Support Intel Auto Counter Reload [ACR] (Kan Liang) - Add PMU support for Clearwater Forest (Dapeng Mi) - Arch-PEBS preparatory changes: (Dapeng Mi) - Parse CPUID archPerfmonExt leaves for non-hybrid CPUs - Decouple BTS initialization from PEBS initialization - Introduce pairs of PEBS static calls x86 AMD PMU enhancements: - Use hrtimer for handling overflows in the AMD uncore driver (Sandipan Das) - Prevent UMC counters from saturating (Sandipan Das) Fixes and cleanups: - Fix put_ctx() ordering (Frederic Weisbecker) - Fix irq work dereferencing garbage (Frederic Weisbecker) - Misc fixes and cleanups (Changbin Du, Frederic Weisbecker, Ian Rogers, Ingo Molnar, Kan Liang, Peter Zijlstra, Qing Wang, Sandipan Das, Thorsten Blum) Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmgy4zoRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1j6QRAAvQ4GBPrdJLb8oXkLjCmWSp9PfM1h2IW0 reUrcV0BPRAwz4T60QEU2KyiEjvKxNghR6bNw4i3slAZ8EFwP9eWE/0ZYOo5+W/N wv8vsopv/oZd2L2G5TgxDJf+tLPkqnTvp651LmGAbquPFONN1lsya9UHVPnt2qtv fvFhjW6D828VoevRcUCsdoEUNlFDkUYQ2c3M1y5H2AI6ILDVxLsp5uYtuVUP+2lQ 7UI/elqRIIblTGT7G9LvTGiXZMm8T58fe1OOLekT6NdweJ3XEt1kMdFo/SCRYfzU eDVVVLSextZfzBXNPtAEAlM3aSgd8+4m5sACiD1EeOUNjo5J9Sj1OOCa+bZGF/Rl XNv5Kcp6Kh1T4N5lio8DE/NabmHDqDMbUGfud+VTS8uLLku4kuOWNMxJTD1nQ2Zz BMfJhP89G9Vk07F9fOGuG1N6mKhIKNOgXh0S92tB7XDHcdJegueu2xh4ZszBL1QK JVXa4DbnDj+y0LvnV+A5Z6VILr5RiCAipDb9ascByPja6BbN10Nf9Aj4nWwRTwbO ut5OK/fDKmSjEHn1+a42d4iRxdIXIWhXCyxEhH+hJXEFx9htbQ3oAbXAEedeJTlT g9QYGAjL96QEd0CqviorV8KyU59nVkEPoLVCumXBZ0WWhNwU6GdAmsW1hLfxQdLN sp+XHhfxf8M= =tPRs -----END PGP SIGNATURE----- Merge tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf events updates from Ingo Molnar: "Core & generic-arch updates: - Add support for dynamic constraints and propagate it to the Intel driver (Kan Liang) - Fix & enhance driver-specific throttling support (Kan Liang) - Record sample last_period before updating on the x86 and PowerPC platforms (Mark Barnett) - Make perf_pmu_unregister() usable (Peter Zijlstra) - Unify perf_event_free_task() / perf_event_exit_task_context() (Peter Zijlstra) - Simplify perf_event_release_kernel() and perf_event_free_task() (Peter Zijlstra) - Allocate non-contiguous AUX pages by default (Yabin Cui) Uprobes updates: - Add support to emulate NOP instructions (Jiri Olsa) - selftests/bpf: Add 5-byte NOP uprobe trigger benchmark (Jiri Olsa) x86 Intel PMU enhancements: - Support Intel Auto Counter Reload [ACR] (Kan Liang) - Add PMU support for Clearwater Forest (Dapeng Mi) - Arch-PEBS preparatory changes: (Dapeng Mi) - Parse CPUID archPerfmonExt leaves for non-hybrid CPUs - Decouple BTS initialization from PEBS initialization - Introduce pairs of PEBS static calls x86 AMD PMU enhancements: - Use hrtimer for handling overflows in the AMD uncore driver (Sandipan Das) - Prevent UMC counters from saturating (Sandipan Das) Fixes and cleanups: - Fix put_ctx() ordering (Frederic Weisbecker) - Fix irq work dereferencing garbage (Frederic Weisbecker) - Misc fixes and cleanups (Changbin Du, Frederic Weisbecker, Ian Rogers, Ingo Molnar, Kan Liang, Peter Zijlstra, Qing Wang, Sandipan Das, Thorsten Blum)" * tag 'perf-core-2025-05-25' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (60 commits) perf/headers: Clean up <linux/perf_event.h> a bit perf/uapi: Clean up <uapi/linux/perf_event.h> a bit perf/uapi: Fix PERF_RECORD_SAMPLE comments in <uapi/linux/perf_event.h> mips/perf: Remove driver-specific throttle support xtensa/perf: Remove driver-specific throttle support sparc/perf: Remove driver-specific throttle support loongarch/perf: Remove driver-specific throttle support csky/perf: Remove driver-specific throttle support arc/perf: Remove driver-specific throttle support alpha/perf: Remove driver-specific throttle support perf/apple_m1: Remove driver-specific throttle support perf/arm: Remove driver-specific throttle support s390/perf: Remove driver-specific throttle support powerpc/perf: Remove driver-specific throttle support perf/x86/zhaoxin: Remove driver-specific throttle support perf/x86/amd: Remove driver-specific throttle support perf/x86/intel: Remove driver-specific throttle support perf: Only dump the throttle log for the leader perf: Fix the throttle logic for a group perf/core: Add the is_event_in_freq_mode() helper to simplify the code ...
This commit is contained in:
commit
ddddf9d64f
|
@ -852,14 +852,9 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
|
||||||
alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
|
alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1);
|
||||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||||
|
|
||||||
if (alpha_perf_event_set_period(event, hwc, idx)) {
|
if (alpha_perf_event_set_period(event, hwc, idx))
|
||||||
if (perf_event_overflow(event, &data, regs)) {
|
perf_event_overflow(event, &data, regs);
|
||||||
/* Interrupts coming too quickly; "throttle" the
|
|
||||||
* counter, i.e., disable it for a little while.
|
|
||||||
*/
|
|
||||||
alpha_pmu_stop(event, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
|
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -599,10 +599,8 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev)
|
||||||
|
|
||||||
arc_perf_event_update(event, &event->hw, event->hw.idx);
|
arc_perf_event_update(event, &event->hw, event->hw.idx);
|
||||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||||
if (arc_pmu_event_set_period(event)) {
|
if (arc_pmu_event_set_period(event))
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
arc_pmu_stop(event, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
active_ints &= ~BIT(idx);
|
active_ints &= ~BIT(idx);
|
||||||
} while (active_ints);
|
} while (active_ints);
|
||||||
|
|
|
@ -1139,8 +1139,7 @@ static irqreturn_t csky_pmu_handle_irq(int irq_num, void *dev)
|
||||||
perf_sample_data_init(&data, 0, hwc->last_period);
|
perf_sample_data_init(&data, 0, hwc->last_period);
|
||||||
csky_pmu_event_set_period(event);
|
csky_pmu_event_set_period(event);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
csky_pmu_stop_event(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
csky_pmu_enable(&csky_pmu.pmu);
|
csky_pmu_enable(&csky_pmu.pmu);
|
||||||
|
|
|
@ -479,8 +479,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc, int idx,
|
||||||
if (!loongarch_pmu_event_set_period(event, hwc, idx))
|
if (!loongarch_pmu_event_set_period(event, hwc, idx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (perf_event_overflow(event, data, regs))
|
perf_event_overflow(event, data, regs);
|
||||||
loongarch_pmu_disable_event(idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static irqreturn_t pmu_handle_irq(int irq, void *dev)
|
static irqreturn_t pmu_handle_irq(int irq, void *dev)
|
||||||
|
|
|
@ -791,8 +791,7 @@ static void handle_associated_event(struct cpu_hw_events *cpuc,
|
||||||
if (!mipspmu_event_set_period(event, hwc, idx))
|
if (!mipspmu_event_set_period(event, hwc, idx))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (perf_event_overflow(event, data, regs))
|
perf_event_overflow(event, data, regs);
|
||||||
mipsxx_pmu_disable_event(idx);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2239,6 +2239,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
u64 period = event->hw.sample_period;
|
u64 period = event->hw.sample_period;
|
||||||
|
const u64 last_period = event->hw.last_period;
|
||||||
s64 prev, delta, left;
|
s64 prev, delta, left;
|
||||||
int record = 0;
|
int record = 0;
|
||||||
|
|
||||||
|
@ -2320,7 +2321,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
if (record) {
|
if (record) {
|
||||||
struct perf_sample_data data;
|
struct perf_sample_data data;
|
||||||
|
|
||||||
perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
|
perf_sample_data_init(&data, ~0ULL, last_period);
|
||||||
|
|
||||||
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
|
if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
|
||||||
perf_get_data_addr(event, regs, &data.addr);
|
perf_get_data_addr(event, regs, &data.addr);
|
||||||
|
@ -2343,12 +2344,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
|
ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
|
||||||
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
|
||||||
}
|
}
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
power_pmu_stop(event, 0);
|
|
||||||
} else if (period) {
|
} else if (period) {
|
||||||
/* Account for interrupt in case of invalid SIAR */
|
/* Account for interrupt in case of invalid SIAR */
|
||||||
if (perf_event_account_interrupt(event))
|
perf_event_account_interrupt(event);
|
||||||
power_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -590,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
struct pt_regs *regs)
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
u64 period = event->hw.sample_period;
|
u64 period = event->hw.sample_period;
|
||||||
|
const u64 last_period = event->hw.last_period;
|
||||||
s64 prev, delta, left;
|
s64 prev, delta, left;
|
||||||
int record = 0;
|
int record = 0;
|
||||||
|
|
||||||
|
@ -632,10 +633,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
|
||||||
if (record) {
|
if (record) {
|
||||||
struct perf_sample_data data;
|
struct perf_sample_data data;
|
||||||
|
|
||||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
perf_sample_data_init(&data, 0, last_period);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
fsl_emb_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -980,8 +980,6 @@ static int cfdiag_push_sample(struct perf_event *event,
|
||||||
}
|
}
|
||||||
|
|
||||||
overflow = perf_event_overflow(event, &data, ®s);
|
overflow = perf_event_overflow(event, &data, ®s);
|
||||||
if (overflow)
|
|
||||||
event->pmu->stop(event, 0);
|
|
||||||
|
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
return overflow;
|
return overflow;
|
||||||
|
|
|
@ -1072,10 +1072,7 @@ static int perf_push_sample(struct perf_event *event,
|
||||||
overflow = 0;
|
overflow = 0;
|
||||||
if (perf_event_exclude(event, ®s, sde_regs))
|
if (perf_event_exclude(event, ®s, sde_regs))
|
||||||
goto out;
|
goto out;
|
||||||
if (perf_event_overflow(event, &data, ®s)) {
|
overflow = perf_event_overflow(event, &data, ®s);
|
||||||
overflow = 1;
|
|
||||||
event->pmu->stop(event, 0);
|
|
||||||
}
|
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
out:
|
out:
|
||||||
return overflow;
|
return overflow;
|
||||||
|
|
|
@ -1668,8 +1668,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
|
||||||
if (!sparc_perf_event_set_period(event, hwc, idx))
|
if (!sparc_perf_event_set_period(event, hwc, idx))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
sparc_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
finish_clock = sched_clock();
|
finish_clock = sched_clock();
|
||||||
|
|
|
@ -1003,8 +1003,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
|
||||||
|
|
||||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1373,9 +1373,7 @@ fail:
|
||||||
hwc->sample_period = perf_ibs->min_period;
|
hwc->sample_period = perf_ibs->min_period;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
if (throttle) {
|
if (!throttle) {
|
||||||
perf_ibs_stop(event, 0);
|
|
||||||
} else {
|
|
||||||
if (perf_ibs == &perf_ibs_op) {
|
if (perf_ibs == &perf_ibs_op) {
|
||||||
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
|
if (ibs_caps & IBS_CAPS_OPCNTEXT) {
|
||||||
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
|
new_config = period & IBS_OP_MAX_CNT_EXT_MASK;
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#define NUM_COUNTERS_NB 4
|
#define NUM_COUNTERS_NB 4
|
||||||
#define NUM_COUNTERS_L2 4
|
#define NUM_COUNTERS_L2 4
|
||||||
#define NUM_COUNTERS_L3 6
|
#define NUM_COUNTERS_L3 6
|
||||||
|
#define NUM_COUNTERS_MAX 64
|
||||||
|
|
||||||
#define RDPMC_BASE_NB 6
|
#define RDPMC_BASE_NB 6
|
||||||
#define RDPMC_BASE_LLC 10
|
#define RDPMC_BASE_LLC 10
|
||||||
|
@ -38,7 +39,10 @@ struct amd_uncore_ctx {
|
||||||
int refcnt;
|
int refcnt;
|
||||||
int cpu;
|
int cpu;
|
||||||
struct perf_event **events;
|
struct perf_event **events;
|
||||||
struct hlist_node node;
|
unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
|
||||||
|
int nr_active;
|
||||||
|
struct hrtimer hrtimer;
|
||||||
|
u64 hrtimer_duration;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amd_uncore_pmu {
|
struct amd_uncore_pmu {
|
||||||
|
@ -83,11 +87,51 @@ struct amd_uncore {
|
||||||
|
|
||||||
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
|
static struct amd_uncore uncores[UNCORE_TYPE_MAX];
|
||||||
|
|
||||||
|
/* Interval for hrtimer, defaults to 60000 milliseconds */
|
||||||
|
static unsigned int update_interval = 60 * MSEC_PER_SEC;
|
||||||
|
module_param(update_interval, uint, 0444);
|
||||||
|
|
||||||
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
|
static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
|
||||||
{
|
{
|
||||||
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
|
return container_of(event->pmu, struct amd_uncore_pmu, pmu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
|
||||||
|
{
|
||||||
|
struct amd_uncore_ctx *ctx;
|
||||||
|
struct perf_event *event;
|
||||||
|
int bit;
|
||||||
|
|
||||||
|
ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
|
||||||
|
|
||||||
|
if (!ctx->nr_active || ctx->cpu != smp_processor_id())
|
||||||
|
return HRTIMER_NORESTART;
|
||||||
|
|
||||||
|
for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
|
||||||
|
event = ctx->events[bit];
|
||||||
|
event->pmu->read(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
|
||||||
|
return HRTIMER_RESTART;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
|
||||||
|
{
|
||||||
|
hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
|
||||||
|
HRTIMER_MODE_REL_PINNED_HARD);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
|
||||||
|
{
|
||||||
|
hrtimer_cancel(&ctx->hrtimer);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
|
||||||
|
{
|
||||||
|
hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
|
||||||
|
}
|
||||||
|
|
||||||
static void amd_uncore_read(struct perf_event *event)
|
static void amd_uncore_read(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
@ -118,18 +162,26 @@ static void amd_uncore_read(struct perf_event *event)
|
||||||
|
|
||||||
static void amd_uncore_start(struct perf_event *event, int flags)
|
static void amd_uncore_start(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
|
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
|
||||||
|
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
|
if (!ctx->nr_active++)
|
||||||
|
amd_uncore_start_hrtimer(ctx);
|
||||||
|
|
||||||
if (flags & PERF_EF_RELOAD)
|
if (flags & PERF_EF_RELOAD)
|
||||||
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
||||||
|
|
||||||
hwc->state = 0;
|
hwc->state = 0;
|
||||||
|
__set_bit(hwc->idx, ctx->active_mask);
|
||||||
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
|
wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void amd_uncore_stop(struct perf_event *event, int flags)
|
static void amd_uncore_stop(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
|
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
|
||||||
|
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
wrmsrl(hwc->config_base, hwc->config);
|
wrmsrl(hwc->config_base, hwc->config);
|
||||||
|
@ -139,6 +191,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
|
||||||
event->pmu->read(event);
|
event->pmu->read(event);
|
||||||
hwc->state |= PERF_HES_UPTODATE;
|
hwc->state |= PERF_HES_UPTODATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!--ctx->nr_active)
|
||||||
|
amd_uncore_cancel_hrtimer(ctx);
|
||||||
|
|
||||||
|
__clear_bit(hwc->idx, ctx->active_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int amd_uncore_add(struct perf_event *event, int flags)
|
static int amd_uncore_add(struct perf_event *event, int flags)
|
||||||
|
@ -491,6 +548,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
amd_uncore_init_hrtimer(curr);
|
||||||
|
curr->hrtimer_duration = (u64)update_interval * NSEC_PER_MSEC;
|
||||||
|
|
||||||
cpumask_set_cpu(cpu, &pmu->active_mask);
|
cpumask_set_cpu(cpu, &pmu->active_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -880,16 +940,55 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
|
||||||
|
|
||||||
static void amd_uncore_umc_start(struct perf_event *event, int flags)
|
static void amd_uncore_umc_start(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
|
struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
|
||||||
|
struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
|
||||||
struct hw_perf_event *hwc = &event->hw;
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
|
if (!ctx->nr_active++)
|
||||||
|
amd_uncore_start_hrtimer(ctx);
|
||||||
|
|
||||||
if (flags & PERF_EF_RELOAD)
|
if (flags & PERF_EF_RELOAD)
|
||||||
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
|
||||||
|
|
||||||
hwc->state = 0;
|
hwc->state = 0;
|
||||||
|
__set_bit(hwc->idx, ctx->active_mask);
|
||||||
wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
|
wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
|
||||||
perf_event_update_userpage(event);
|
perf_event_update_userpage(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void amd_uncore_umc_read(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
u64 prev, new, shift;
|
||||||
|
s64 delta;
|
||||||
|
|
||||||
|
shift = COUNTER_SHIFT + 1;
|
||||||
|
prev = local64_read(&hwc->prev_count);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* UMC counters do not have RDPMC assignments. Read counts directly
|
||||||
|
* from the corresponding PERF_CTR.
|
||||||
|
*/
|
||||||
|
rdmsrl(hwc->event_base, new);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unlike the other uncore counters, UMC counters saturate and set the
|
||||||
|
* Overflow bit (bit 48) on overflow. Since they do not roll over,
|
||||||
|
* proactively reset the corresponding PERF_CTR when bit 47 is set so
|
||||||
|
* that the counter never gets a chance to saturate.
|
||||||
|
*/
|
||||||
|
if (new & BIT_ULL(63 - COUNTER_SHIFT)) {
|
||||||
|
wrmsrl(hwc->event_base, 0);
|
||||||
|
local64_set(&hwc->prev_count, 0);
|
||||||
|
} else {
|
||||||
|
local64_set(&hwc->prev_count, new);
|
||||||
|
}
|
||||||
|
|
||||||
|
delta = (new << shift) - (prev << shift);
|
||||||
|
delta >>= shift;
|
||||||
|
local64_add(delta, &event->count);
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
|
void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
|
||||||
{
|
{
|
||||||
|
@ -968,7 +1067,7 @@ int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
|
||||||
.del = amd_uncore_del,
|
.del = amd_uncore_del,
|
||||||
.start = amd_uncore_umc_start,
|
.start = amd_uncore_umc_start,
|
||||||
.stop = amd_uncore_stop,
|
.stop = amd_uncore_stop,
|
||||||
.read = amd_uncore_read,
|
.read = amd_uncore_umc_read,
|
||||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
|
.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
};
|
};
|
||||||
|
|
|
@ -95,6 +95,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);
|
||||||
|
|
||||||
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
|
DEFINE_STATIC_CALL_NULL(x86_pmu_late_setup, *x86_pmu.late_setup);
|
||||||
|
|
||||||
|
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
|
||||||
|
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
|
||||||
|
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
|
||||||
|
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This one is magic, it will get called even when PMU init fails (because
|
* This one is magic, it will get called even when PMU init fails (because
|
||||||
* there is no PMU), in which case it should simply return NULL.
|
* there is no PMU), in which case it should simply return NULL.
|
||||||
|
@ -674,6 +679,7 @@ static int __x86_pmu_event_init(struct perf_event *event)
|
||||||
event->hw.idx = -1;
|
event->hw.idx = -1;
|
||||||
event->hw.last_cpu = -1;
|
event->hw.last_cpu = -1;
|
||||||
event->hw.last_tag = ~0ULL;
|
event->hw.last_tag = ~0ULL;
|
||||||
|
event->hw.dyn_constraint = ~0ULL;
|
||||||
|
|
||||||
/* mark unused */
|
/* mark unused */
|
||||||
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
event->hw.extra_reg.idx = EXTRA_REG_NONE;
|
||||||
|
@ -756,15 +762,16 @@ void x86_pmu_enable_all(int added)
|
||||||
|
|
||||||
int is_x86_event(struct perf_event *event)
|
int is_x86_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
int i;
|
/*
|
||||||
|
* For a non-hybrid platforms, the type of X86 pmu is
|
||||||
if (!is_hybrid())
|
* always PERF_TYPE_RAW.
|
||||||
return event->pmu == &pmu;
|
* For a hybrid platform, the PERF_PMU_CAP_EXTENDED_HW_TYPE
|
||||||
|
* is a unique capability for the X86 PMU.
|
||||||
for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
|
* Use them to detect a X86 event.
|
||||||
if (event->pmu == &x86_pmu.hybrid_pmu[i].pmu)
|
*/
|
||||||
return true;
|
if (event->pmu->type == PERF_TYPE_RAW ||
|
||||||
}
|
event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_HW_TYPE)
|
||||||
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1683,6 +1690,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||||
struct cpu_hw_events *cpuc;
|
struct cpu_hw_events *cpuc;
|
||||||
struct perf_event *event;
|
struct perf_event *event;
|
||||||
int idx, handled = 0;
|
int idx, handled = 0;
|
||||||
|
u64 last_period;
|
||||||
u64 val;
|
u64 val;
|
||||||
|
|
||||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
@ -1702,6 +1710,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
event = cpuc->events[idx];
|
event = cpuc->events[idx];
|
||||||
|
last_period = event->hw.last_period;
|
||||||
|
|
||||||
val = static_call(x86_pmu_update)(event);
|
val = static_call(x86_pmu_update)(event);
|
||||||
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
|
if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
|
||||||
|
@ -1715,12 +1724,11 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
|
||||||
if (!static_call(x86_pmu_set_period)(event))
|
if (!static_call(x86_pmu_set_period)(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
perf_sample_data_init(&data, 0, last_period);
|
||||||
|
|
||||||
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handled)
|
if (handled)
|
||||||
|
@ -2046,6 +2054,11 @@ static void x86_pmu_static_call_update(void)
|
||||||
static_call_update(x86_pmu_filter, x86_pmu.filter);
|
static_call_update(x86_pmu_filter, x86_pmu.filter);
|
||||||
|
|
||||||
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
|
static_call_update(x86_pmu_late_setup, x86_pmu.late_setup);
|
||||||
|
|
||||||
|
static_call_update(x86_pmu_pebs_enable, x86_pmu.pebs_enable);
|
||||||
|
static_call_update(x86_pmu_pebs_disable, x86_pmu.pebs_disable);
|
||||||
|
static_call_update(x86_pmu_pebs_enable_all, x86_pmu.pebs_enable_all);
|
||||||
|
static_call_update(x86_pmu_pebs_disable_all, x86_pmu.pebs_disable_all);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _x86_pmu_read(struct perf_event *event)
|
static void _x86_pmu_read(struct perf_event *event)
|
||||||
|
|
|
@ -80,54 +80,54 @@ static void *
|
||||||
bts_buffer_setup_aux(struct perf_event *event, void **pages,
|
bts_buffer_setup_aux(struct perf_event *event, void **pages,
|
||||||
int nr_pages, bool overwrite)
|
int nr_pages, bool overwrite)
|
||||||
{
|
{
|
||||||
struct bts_buffer *buf;
|
struct bts_buffer *bb;
|
||||||
struct page *page;
|
struct page *page;
|
||||||
int cpu = event->cpu;
|
int cpu = event->cpu;
|
||||||
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
||||||
unsigned long offset;
|
unsigned long offset;
|
||||||
size_t size = nr_pages << PAGE_SHIFT;
|
size_t size = nr_pages << PAGE_SHIFT;
|
||||||
int pg, nbuf, pad;
|
int pg, nr_buf, pad;
|
||||||
|
|
||||||
/* count all the high order buffers */
|
/* count all the high order buffers */
|
||||||
for (pg = 0, nbuf = 0; pg < nr_pages;) {
|
for (pg = 0, nr_buf = 0; pg < nr_pages;) {
|
||||||
page = virt_to_page(pages[pg]);
|
page = virt_to_page(pages[pg]);
|
||||||
pg += buf_nr_pages(page);
|
pg += buf_nr_pages(page);
|
||||||
nbuf++;
|
nr_buf++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* to avoid interrupts in overwrite mode, only allow one physical
|
* to avoid interrupts in overwrite mode, only allow one physical
|
||||||
*/
|
*/
|
||||||
if (overwrite && nbuf > 1)
|
if (overwrite && nr_buf > 1)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
|
bb = kzalloc_node(struct_size(bb, buf, nr_buf), GFP_KERNEL, node);
|
||||||
if (!buf)
|
if (!bb)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
buf->nr_pages = nr_pages;
|
bb->nr_pages = nr_pages;
|
||||||
buf->nr_bufs = nbuf;
|
bb->nr_bufs = nr_buf;
|
||||||
buf->snapshot = overwrite;
|
bb->snapshot = overwrite;
|
||||||
buf->data_pages = pages;
|
bb->data_pages = pages;
|
||||||
buf->real_size = size - size % BTS_RECORD_SIZE;
|
bb->real_size = size - size % BTS_RECORD_SIZE;
|
||||||
|
|
||||||
for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
|
for (pg = 0, nr_buf = 0, offset = 0, pad = 0; nr_buf < bb->nr_bufs; nr_buf++) {
|
||||||
unsigned int __nr_pages;
|
unsigned int __nr_pages;
|
||||||
|
|
||||||
page = virt_to_page(pages[pg]);
|
page = virt_to_page(pages[pg]);
|
||||||
__nr_pages = buf_nr_pages(page);
|
__nr_pages = buf_nr_pages(page);
|
||||||
buf->buf[nbuf].page = page;
|
bb->buf[nr_buf].page = page;
|
||||||
buf->buf[nbuf].offset = offset;
|
bb->buf[nr_buf].offset = offset;
|
||||||
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
|
bb->buf[nr_buf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
|
||||||
buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
|
bb->buf[nr_buf].size = buf_size(page) - bb->buf[nr_buf].displacement;
|
||||||
pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
|
pad = bb->buf[nr_buf].size % BTS_RECORD_SIZE;
|
||||||
buf->buf[nbuf].size -= pad;
|
bb->buf[nr_buf].size -= pad;
|
||||||
|
|
||||||
pg += __nr_pages;
|
pg += __nr_pages;
|
||||||
offset += __nr_pages << PAGE_SHIFT;
|
offset += __nr_pages << PAGE_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return buf;
|
return bb;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bts_buffer_free_aux(void *data)
|
static void bts_buffer_free_aux(void *data)
|
||||||
|
@ -135,25 +135,25 @@ static void bts_buffer_free_aux(void *data)
|
||||||
kfree(data);
|
kfree(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
|
static unsigned long bts_buffer_offset(struct bts_buffer *bb, unsigned int idx)
|
||||||
{
|
{
|
||||||
return buf->buf[idx].offset + buf->buf[idx].displacement;
|
return bb->buf[idx].offset + bb->buf[idx].displacement;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
bts_config_buffer(struct bts_buffer *buf)
|
bts_config_buffer(struct bts_buffer *bb)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||||
struct bts_phys *phys = &buf->buf[buf->cur_buf];
|
struct bts_phys *phys = &bb->buf[bb->cur_buf];
|
||||||
unsigned long index, thresh = 0, end = phys->size;
|
unsigned long index, thresh = 0, end = phys->size;
|
||||||
struct page *page = phys->page;
|
struct page *page = phys->page;
|
||||||
|
|
||||||
index = local_read(&buf->head);
|
index = local_read(&bb->head);
|
||||||
|
|
||||||
if (!buf->snapshot) {
|
if (!bb->snapshot) {
|
||||||
if (buf->end < phys->offset + buf_size(page))
|
if (bb->end < phys->offset + buf_size(page))
|
||||||
end = buf->end - phys->offset - phys->displacement;
|
end = bb->end - phys->offset - phys->displacement;
|
||||||
|
|
||||||
index -= phys->offset + phys->displacement;
|
index -= phys->offset + phys->displacement;
|
||||||
|
|
||||||
|
@ -168,7 +168,7 @@ bts_config_buffer(struct bts_buffer *buf)
|
||||||
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
|
ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
|
||||||
ds->bts_index = ds->bts_buffer_base + index;
|
ds->bts_index = ds->bts_buffer_base + index;
|
||||||
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
|
ds->bts_absolute_maximum = ds->bts_buffer_base + end;
|
||||||
ds->bts_interrupt_threshold = !buf->snapshot
|
ds->bts_interrupt_threshold = !bb->snapshot
|
||||||
? ds->bts_buffer_base + thresh
|
? ds->bts_buffer_base + thresh
|
||||||
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
|
: ds->bts_absolute_maximum + BTS_RECORD_SIZE;
|
||||||
}
|
}
|
||||||
|
@ -184,16 +184,16 @@ static void bts_update(struct bts_ctx *bts)
|
||||||
{
|
{
|
||||||
int cpu = raw_smp_processor_id();
|
int cpu = raw_smp_processor_id();
|
||||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
struct bts_buffer *bb = perf_get_aux(&bts->handle);
|
||||||
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
|
unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
|
||||||
|
|
||||||
if (!buf)
|
if (!bb)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
head = index + bts_buffer_offset(buf, buf->cur_buf);
|
head = index + bts_buffer_offset(bb, bb->cur_buf);
|
||||||
old = local_xchg(&buf->head, head);
|
old = local_xchg(&bb->head, head);
|
||||||
|
|
||||||
if (!buf->snapshot) {
|
if (!bb->snapshot) {
|
||||||
if (old == head)
|
if (old == head)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -205,9 +205,9 @@ static void bts_update(struct bts_ctx *bts)
|
||||||
* old and head are always in the same physical buffer, so we
|
* old and head are always in the same physical buffer, so we
|
||||||
* can subtract them to get the data size.
|
* can subtract them to get the data size.
|
||||||
*/
|
*/
|
||||||
local_add(head - old, &buf->data_size);
|
local_add(head - old, &bb->data_size);
|
||||||
} else {
|
} else {
|
||||||
local_set(&buf->data_size, head);
|
local_set(&bb->data_size, head);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -218,7 +218,7 @@ static void bts_update(struct bts_ctx *bts)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
|
bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Ordering PMU callbacks wrt themselves and the PMI is done by means
|
* Ordering PMU callbacks wrt themselves and the PMI is done by means
|
||||||
|
@ -232,17 +232,17 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
|
||||||
static void __bts_event_start(struct perf_event *event)
|
static void __bts_event_start(struct perf_event *event)
|
||||||
{
|
{
|
||||||
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
||||||
struct bts_buffer *buf = perf_get_aux(&bts->handle);
|
struct bts_buffer *bb = perf_get_aux(&bts->handle);
|
||||||
u64 config = 0;
|
u64 config = 0;
|
||||||
|
|
||||||
if (!buf->snapshot)
|
if (!bb->snapshot)
|
||||||
config |= ARCH_PERFMON_EVENTSEL_INT;
|
config |= ARCH_PERFMON_EVENTSEL_INT;
|
||||||
if (!event->attr.exclude_kernel)
|
if (!event->attr.exclude_kernel)
|
||||||
config |= ARCH_PERFMON_EVENTSEL_OS;
|
config |= ARCH_PERFMON_EVENTSEL_OS;
|
||||||
if (!event->attr.exclude_user)
|
if (!event->attr.exclude_user)
|
||||||
config |= ARCH_PERFMON_EVENTSEL_USR;
|
config |= ARCH_PERFMON_EVENTSEL_USR;
|
||||||
|
|
||||||
bts_config_buffer(buf);
|
bts_config_buffer(bb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* local barrier to make sure that ds configuration made it
|
* local barrier to make sure that ds configuration made it
|
||||||
|
@ -261,13 +261,13 @@ static void bts_event_start(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
||||||
struct bts_buffer *buf;
|
struct bts_buffer *bb;
|
||||||
|
|
||||||
buf = perf_aux_output_begin(&bts->handle, event);
|
bb = perf_aux_output_begin(&bts->handle, event);
|
||||||
if (!buf)
|
if (!bb)
|
||||||
goto fail_stop;
|
goto fail_stop;
|
||||||
|
|
||||||
if (bts_buffer_reset(buf, &bts->handle))
|
if (bts_buffer_reset(bb, &bts->handle))
|
||||||
goto fail_end_stop;
|
goto fail_end_stop;
|
||||||
|
|
||||||
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
|
||||||
|
@ -306,27 +306,27 @@ static void bts_event_stop(struct perf_event *event, int flags)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
struct bts_ctx *bts = this_cpu_ptr(bts_ctx);
|
||||||
struct bts_buffer *buf = NULL;
|
struct bts_buffer *bb = NULL;
|
||||||
int state = READ_ONCE(bts->state);
|
int state = READ_ONCE(bts->state);
|
||||||
|
|
||||||
if (state == BTS_STATE_ACTIVE)
|
if (state == BTS_STATE_ACTIVE)
|
||||||
__bts_event_stop(event, BTS_STATE_STOPPED);
|
__bts_event_stop(event, BTS_STATE_STOPPED);
|
||||||
|
|
||||||
if (state != BTS_STATE_STOPPED)
|
if (state != BTS_STATE_STOPPED)
|
||||||
buf = perf_get_aux(&bts->handle);
|
bb = perf_get_aux(&bts->handle);
|
||||||
|
|
||||||
event->hw.state |= PERF_HES_STOPPED;
|
event->hw.state |= PERF_HES_STOPPED;
|
||||||
|
|
||||||
if (flags & PERF_EF_UPDATE) {
|
if (flags & PERF_EF_UPDATE) {
|
||||||
bts_update(bts);
|
bts_update(bts);
|
||||||
|
|
||||||
if (buf) {
|
if (bb) {
|
||||||
if (buf->snapshot)
|
if (bb->snapshot)
|
||||||
bts->handle.head =
|
bts->handle.head =
|
||||||
local_xchg(&buf->data_size,
|
local_xchg(&bb->data_size,
|
||||||
buf->nr_pages << PAGE_SHIFT);
|
bb->nr_pages << PAGE_SHIFT);
|
||||||
perf_aux_output_end(&bts->handle,
|
perf_aux_output_end(&bts->handle,
|
||||||
local_xchg(&buf->data_size, 0));
|
local_xchg(&bb->data_size, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
|
||||||
|
@ -382,19 +382,19 @@ void intel_bts_disable_local(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
|
bts_buffer_reset(struct bts_buffer *bb, struct perf_output_handle *handle)
|
||||||
{
|
{
|
||||||
unsigned long head, space, next_space, pad, gap, skip, wakeup;
|
unsigned long head, space, next_space, pad, gap, skip, wakeup;
|
||||||
unsigned int next_buf;
|
unsigned int next_buf;
|
||||||
struct bts_phys *phys, *next_phys;
|
struct bts_phys *phys, *next_phys;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (buf->snapshot)
|
if (bb->snapshot)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
|
head = handle->head & ((bb->nr_pages << PAGE_SHIFT) - 1);
|
||||||
|
|
||||||
phys = &buf->buf[buf->cur_buf];
|
phys = &bb->buf[bb->cur_buf];
|
||||||
space = phys->offset + phys->displacement + phys->size - head;
|
space = phys->offset + phys->displacement + phys->size - head;
|
||||||
pad = space;
|
pad = space;
|
||||||
if (space > handle->size) {
|
if (space > handle->size) {
|
||||||
|
@ -403,10 +403,10 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
|
||||||
}
|
}
|
||||||
if (space <= BTS_SAFETY_MARGIN) {
|
if (space <= BTS_SAFETY_MARGIN) {
|
||||||
/* See if next phys buffer has more space */
|
/* See if next phys buffer has more space */
|
||||||
next_buf = buf->cur_buf + 1;
|
next_buf = bb->cur_buf + 1;
|
||||||
if (next_buf >= buf->nr_bufs)
|
if (next_buf >= bb->nr_bufs)
|
||||||
next_buf = 0;
|
next_buf = 0;
|
||||||
next_phys = &buf->buf[next_buf];
|
next_phys = &bb->buf[next_buf];
|
||||||
gap = buf_size(phys->page) - phys->displacement - phys->size +
|
gap = buf_size(phys->page) - phys->displacement - phys->size +
|
||||||
next_phys->displacement;
|
next_phys->displacement;
|
||||||
skip = pad + gap;
|
skip = pad + gap;
|
||||||
|
@ -431,8 +431,8 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
|
||||||
* anymore, so we must not be racing with
|
* anymore, so we must not be racing with
|
||||||
* bts_update().
|
* bts_update().
|
||||||
*/
|
*/
|
||||||
buf->cur_buf = next_buf;
|
bb->cur_buf = next_buf;
|
||||||
local_set(&buf->head, head);
|
local_set(&bb->head, head);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -445,7 +445,7 @@ bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
|
||||||
space -= space % BTS_RECORD_SIZE;
|
space -= space % BTS_RECORD_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf->end = head + space;
|
bb->end = head + space;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we have no space, the lost notification would have been sent when
|
* If we have no space, the lost notification would have been sent when
|
||||||
|
@ -462,7 +462,7 @@ int intel_bts_interrupt(void)
|
||||||
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
|
struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
|
||||||
struct bts_ctx *bts;
|
struct bts_ctx *bts;
|
||||||
struct perf_event *event;
|
struct perf_event *event;
|
||||||
struct bts_buffer *buf;
|
struct bts_buffer *bb;
|
||||||
s64 old_head;
|
s64 old_head;
|
||||||
int err = -ENOSPC, handled = 0;
|
int err = -ENOSPC, handled = 0;
|
||||||
|
|
||||||
|
@ -485,8 +485,8 @@ int intel_bts_interrupt(void)
|
||||||
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
|
if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
|
||||||
return handled;
|
return handled;
|
||||||
|
|
||||||
buf = perf_get_aux(&bts->handle);
|
bb = perf_get_aux(&bts->handle);
|
||||||
if (!buf)
|
if (!bb)
|
||||||
return handled;
|
return handled;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -494,26 +494,26 @@ int intel_bts_interrupt(void)
|
||||||
* there's no other way of telling, because the pointer will
|
* there's no other way of telling, because the pointer will
|
||||||
* keep moving
|
* keep moving
|
||||||
*/
|
*/
|
||||||
if (buf->snapshot)
|
if (bb->snapshot)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
old_head = local_read(&buf->head);
|
old_head = local_read(&bb->head);
|
||||||
bts_update(bts);
|
bts_update(bts);
|
||||||
|
|
||||||
/* no new data */
|
/* no new data */
|
||||||
if (old_head == local_read(&buf->head))
|
if (old_head == local_read(&bb->head))
|
||||||
return handled;
|
return handled;
|
||||||
|
|
||||||
perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0));
|
perf_aux_output_end(&bts->handle, local_xchg(&bb->data_size, 0));
|
||||||
|
|
||||||
buf = perf_aux_output_begin(&bts->handle, event);
|
bb = perf_aux_output_begin(&bts->handle, event);
|
||||||
if (buf)
|
if (bb)
|
||||||
err = bts_buffer_reset(buf, &bts->handle);
|
err = bts_buffer_reset(bb, &bts->handle);
|
||||||
|
|
||||||
if (err) {
|
if (err) {
|
||||||
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
|
WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
|
||||||
|
|
||||||
if (buf) {
|
if (bb) {
|
||||||
/*
|
/*
|
||||||
* BTS_STATE_STOPPED should be visible before
|
* BTS_STATE_STOPPED should be visible before
|
||||||
* cleared handle::event
|
* cleared handle::event
|
||||||
|
@ -599,7 +599,11 @@ static void bts_event_read(struct perf_event *event)
|
||||||
|
|
||||||
static __init int bts_init(void)
|
static __init int bts_init(void)
|
||||||
{
|
{
|
||||||
if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
|
if (!boot_cpu_has(X86_FEATURE_DTES64))
|
||||||
|
return -ENODEV;
|
||||||
|
|
||||||
|
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
|
||||||
|
if (!x86_pmu.bts)
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
if (boot_cpu_has(X86_FEATURE_PTI)) {
|
if (boot_cpu_has(X86_FEATURE_PTI)) {
|
||||||
|
|
|
@ -2224,6 +2224,18 @@ static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
|
||||||
EVENT_EXTRA_END
|
EVENT_EXTRA_END
|
||||||
};
|
};
|
||||||
|
|
||||||
|
EVENT_ATTR_STR(topdown-fe-bound, td_fe_bound_skt, "event=0x9c,umask=0x01");
|
||||||
|
EVENT_ATTR_STR(topdown-retiring, td_retiring_skt, "event=0xc2,umask=0x02");
|
||||||
|
EVENT_ATTR_STR(topdown-be-bound, td_be_bound_skt, "event=0xa4,umask=0x02");
|
||||||
|
|
||||||
|
static struct attribute *skt_events_attrs[] = {
|
||||||
|
EVENT_PTR(td_fe_bound_skt),
|
||||||
|
EVENT_PTR(td_retiring_skt),
|
||||||
|
EVENT_PTR(td_bad_spec_cmt),
|
||||||
|
EVENT_PTR(td_be_bound_skt),
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
|
||||||
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
|
||||||
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
|
||||||
|
@ -2294,7 +2306,7 @@ static __always_inline void __intel_pmu_disable_all(bool bts)
|
||||||
static __always_inline void intel_pmu_disable_all(void)
|
static __always_inline void intel_pmu_disable_all(void)
|
||||||
{
|
{
|
||||||
__intel_pmu_disable_all(true);
|
__intel_pmu_disable_all(true);
|
||||||
intel_pmu_pebs_disable_all();
|
static_call_cond(x86_pmu_pebs_disable_all)();
|
||||||
intel_pmu_lbr_disable_all();
|
intel_pmu_lbr_disable_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2326,7 +2338,7 @@ static void __intel_pmu_enable_all(int added, bool pmi)
|
||||||
|
|
||||||
static void intel_pmu_enable_all(int added)
|
static void intel_pmu_enable_all(int added)
|
||||||
{
|
{
|
||||||
intel_pmu_pebs_enable_all();
|
static_call_cond(x86_pmu_pebs_enable_all)();
|
||||||
__intel_pmu_enable_all(added, false);
|
__intel_pmu_enable_all(added, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2583,7 +2595,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
|
||||||
* so we don't trigger the event without PEBS bit set.
|
* so we don't trigger the event without PEBS bit set.
|
||||||
*/
|
*/
|
||||||
if (unlikely(event->attr.precise_ip))
|
if (unlikely(event->attr.precise_ip))
|
||||||
intel_pmu_pebs_disable(event);
|
static_call(x86_pmu_pebs_disable)(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void intel_pmu_assign_event(struct perf_event *event, int idx)
|
static void intel_pmu_assign_event(struct perf_event *event, int idx)
|
||||||
|
@ -2603,6 +2615,9 @@ static void intel_pmu_del_event(struct perf_event *event)
|
||||||
intel_pmu_lbr_del(event);
|
intel_pmu_lbr_del(event);
|
||||||
if (event->attr.precise_ip)
|
if (event->attr.precise_ip)
|
||||||
intel_pmu_pebs_del(event);
|
intel_pmu_pebs_del(event);
|
||||||
|
if (is_pebs_counter_event_group(event) ||
|
||||||
|
is_acr_event_group(event))
|
||||||
|
this_cpu_ptr(&cpu_hw_events)->n_late_setup--;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int icl_set_topdown_event_period(struct perf_event *event)
|
static int icl_set_topdown_event_period(struct perf_event *event)
|
||||||
|
@ -2880,6 +2895,52 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
|
||||||
cpuc->fixed_ctrl_val |= bits;
|
cpuc->fixed_ctrl_val |= bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_config_acr(int idx, u64 mask, u32 reload)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
int msr_b, msr_c;
|
||||||
|
|
||||||
|
if (!mask && !cpuc->acr_cfg_b[idx])
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (idx < INTEL_PMC_IDX_FIXED) {
|
||||||
|
msr_b = MSR_IA32_PMC_V6_GP0_CFG_B;
|
||||||
|
msr_c = MSR_IA32_PMC_V6_GP0_CFG_C;
|
||||||
|
} else {
|
||||||
|
msr_b = MSR_IA32_PMC_V6_FX0_CFG_B;
|
||||||
|
msr_c = MSR_IA32_PMC_V6_FX0_CFG_C;
|
||||||
|
idx -= INTEL_PMC_IDX_FIXED;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cpuc->acr_cfg_b[idx] != mask) {
|
||||||
|
wrmsrl(msr_b + x86_pmu.addr_offset(idx, false), mask);
|
||||||
|
cpuc->acr_cfg_b[idx] = mask;
|
||||||
|
}
|
||||||
|
/* Only need to update the reload value when there is a valid config value. */
|
||||||
|
if (mask && cpuc->acr_cfg_c[idx] != reload) {
|
||||||
|
wrmsrl(msr_c + x86_pmu.addr_offset(idx, false), reload);
|
||||||
|
cpuc->acr_cfg_c[idx] = reload;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_enable_acr(struct perf_event *event)
|
||||||
|
{
|
||||||
|
struct hw_perf_event *hwc = &event->hw;
|
||||||
|
|
||||||
|
if (!is_acr_event_group(event) || !event->attr.config2) {
|
||||||
|
/*
|
||||||
|
* The disable doesn't clear the ACR CFG register.
|
||||||
|
* Check and clear the ACR CFG register.
|
||||||
|
*/
|
||||||
|
intel_pmu_config_acr(hwc->idx, 0, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
intel_pmu_config_acr(hwc->idx, hwc->config1, -hwc->sample_period);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
||||||
|
|
||||||
static void intel_pmu_enable_event(struct perf_event *event)
|
static void intel_pmu_enable_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
|
u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
|
||||||
|
@ -2887,16 +2948,19 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
||||||
int idx = hwc->idx;
|
int idx = hwc->idx;
|
||||||
|
|
||||||
if (unlikely(event->attr.precise_ip))
|
if (unlikely(event->attr.precise_ip))
|
||||||
intel_pmu_pebs_enable(event);
|
static_call(x86_pmu_pebs_enable)(event);
|
||||||
|
|
||||||
switch (idx) {
|
switch (idx) {
|
||||||
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
case 0 ... INTEL_PMC_IDX_FIXED - 1:
|
||||||
if (branch_sample_counters(event))
|
if (branch_sample_counters(event))
|
||||||
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
|
enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
|
||||||
intel_set_masks(event, idx);
|
intel_set_masks(event, idx);
|
||||||
|
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||||
__x86_pmu_enable_event(hwc, enable_mask);
|
__x86_pmu_enable_event(hwc, enable_mask);
|
||||||
break;
|
break;
|
||||||
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
|
||||||
|
static_call_cond(intel_pmu_enable_acr_event)(event);
|
||||||
|
fallthrough;
|
||||||
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
|
||||||
intel_pmu_enable_fixed(event);
|
intel_pmu_enable_fixed(event);
|
||||||
break;
|
break;
|
||||||
|
@ -2914,12 +2978,51 @@ static void intel_pmu_enable_event(struct perf_event *event)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void intel_pmu_acr_late_setup(struct cpu_hw_events *cpuc)
|
||||||
|
{
|
||||||
|
struct perf_event *event, *leader;
|
||||||
|
int i, j, idx;
|
||||||
|
|
||||||
|
for (i = 0; i < cpuc->n_events; i++) {
|
||||||
|
leader = cpuc->event_list[i];
|
||||||
|
if (!is_acr_event_group(leader))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* The ACR events must be contiguous. */
|
||||||
|
for (j = i; j < cpuc->n_events; j++) {
|
||||||
|
event = cpuc->event_list[j];
|
||||||
|
if (event->group_leader != leader->group_leader)
|
||||||
|
break;
|
||||||
|
for_each_set_bit(idx, (unsigned long *)&event->attr.config2, X86_PMC_IDX_MAX) {
|
||||||
|
if (WARN_ON_ONCE(i + idx > cpuc->n_events))
|
||||||
|
return;
|
||||||
|
__set_bit(cpuc->assign[i + idx], (unsigned long *)&event->hw.config1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i = j - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void intel_pmu_late_setup(void)
|
||||||
|
{
|
||||||
|
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||||
|
|
||||||
|
if (!cpuc->n_late_setup)
|
||||||
|
return;
|
||||||
|
|
||||||
|
intel_pmu_pebs_late_setup(cpuc);
|
||||||
|
intel_pmu_acr_late_setup(cpuc);
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_pmu_add_event(struct perf_event *event)
|
static void intel_pmu_add_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
if (event->attr.precise_ip)
|
if (event->attr.precise_ip)
|
||||||
intel_pmu_pebs_add(event);
|
intel_pmu_pebs_add(event);
|
||||||
if (intel_pmu_needs_branch_stack(event))
|
if (intel_pmu_needs_branch_stack(event))
|
||||||
intel_pmu_lbr_add(event);
|
intel_pmu_lbr_add(event);
|
||||||
|
if (is_pebs_counter_event_group(event) ||
|
||||||
|
is_acr_event_group(event))
|
||||||
|
this_cpu_ptr(&cpu_hw_events)->n_late_setup++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3035,8 +3138,7 @@ static void x86_pmu_handle_guest_pebs(struct pt_regs *regs,
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||||
if (perf_event_overflow(event, data, regs))
|
perf_event_overflow(event, data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
|
|
||||||
/* Inject one fake event is enough. */
|
/* Inject one fake event is enough. */
|
||||||
break;
|
break;
|
||||||
|
@ -3141,6 +3243,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||||
|
|
||||||
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
||||||
struct perf_event *event = cpuc->events[bit];
|
struct perf_event *event = cpuc->events[bit];
|
||||||
|
u64 last_period;
|
||||||
|
|
||||||
handled++;
|
handled++;
|
||||||
|
|
||||||
|
@ -3168,16 +3271,17 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
||||||
if (is_pebs_counter_event_group(event))
|
if (is_pebs_counter_event_group(event))
|
||||||
x86_pmu.drain_pebs(regs, &data);
|
x86_pmu.drain_pebs(regs, &data);
|
||||||
|
|
||||||
|
last_period = event->hw.last_period;
|
||||||
|
|
||||||
if (!intel_pmu_save_and_restart(event))
|
if (!intel_pmu_save_and_restart(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
perf_sample_data_init(&data, 0, last_period);
|
||||||
|
|
||||||
if (has_branch_stack(event))
|
if (has_branch_stack(event))
|
||||||
intel_pmu_lbr_save_brstack(&data, cpuc, event);
|
intel_pmu_lbr_save_brstack(&data, cpuc, event);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return handled;
|
return handled;
|
||||||
|
@ -3739,10 +3843,9 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
||||||
if (cpuc->excl_cntrs)
|
if (cpuc->excl_cntrs)
|
||||||
return intel_get_excl_constraints(cpuc, event, idx, c2);
|
return intel_get_excl_constraints(cpuc, event, idx, c2);
|
||||||
|
|
||||||
/* Not all counters support the branch counter feature. */
|
if (event->hw.dyn_constraint != ~0ULL) {
|
||||||
if (branch_sample_counters(event)) {
|
|
||||||
c2 = dyn_constraint(cpuc, c2, idx);
|
c2 = dyn_constraint(cpuc, c2, idx);
|
||||||
c2->idxmsk64 &= x86_pmu.lbr_counters;
|
c2->idxmsk64 &= event->hw.dyn_constraint;
|
||||||
c2->weight = hweight64(c2->idxmsk64);
|
c2->weight = hweight64(c2->idxmsk64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4083,6 +4186,39 @@ end:
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool intel_pmu_has_acr(struct pmu *pmu)
|
||||||
|
{
|
||||||
|
return !!hybrid(pmu, acr_cause_mask64);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool intel_pmu_is_acr_group(struct perf_event *event)
|
||||||
|
{
|
||||||
|
/* The group leader has the ACR flag set */
|
||||||
|
if (is_acr_event_group(event))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
/* The acr_mask is set */
|
||||||
|
if (event->attr.config2)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
|
||||||
|
u64 *cause_mask, int *num)
|
||||||
|
{
|
||||||
|
event->hw.dyn_constraint &= hybrid(event->pmu, acr_cntr_mask64);
|
||||||
|
*cause_mask |= event->attr.config2;
|
||||||
|
*num += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void intel_pmu_set_acr_caused_constr(struct perf_event *event,
|
||||||
|
int idx, u64 cause_mask)
|
||||||
|
{
|
||||||
|
if (test_bit(idx, (unsigned long *)&cause_mask))
|
||||||
|
event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64);
|
||||||
|
}
|
||||||
|
|
||||||
static int intel_pmu_hw_config(struct perf_event *event)
|
static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
{
|
{
|
||||||
int ret = x86_pmu_hw_config(event);
|
int ret = x86_pmu_hw_config(event);
|
||||||
|
@ -4144,15 +4280,19 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
leader = event->group_leader;
|
leader = event->group_leader;
|
||||||
if (branch_sample_call_stack(leader))
|
if (branch_sample_call_stack(leader))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (branch_sample_counters(leader))
|
if (branch_sample_counters(leader)) {
|
||||||
num++;
|
num++;
|
||||||
|
leader->hw.dyn_constraint &= x86_pmu.lbr_counters;
|
||||||
|
}
|
||||||
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
|
leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS;
|
||||||
|
|
||||||
for_each_sibling_event(sibling, leader) {
|
for_each_sibling_event(sibling, leader) {
|
||||||
if (branch_sample_call_stack(sibling))
|
if (branch_sample_call_stack(sibling))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (branch_sample_counters(sibling))
|
if (branch_sample_counters(sibling)) {
|
||||||
num++;
|
num++;
|
||||||
|
sibling->hw.dyn_constraint &= x86_pmu.lbr_counters;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num > fls(x86_pmu.lbr_counters))
|
if (num > fls(x86_pmu.lbr_counters))
|
||||||
|
@ -4207,6 +4347,94 @@ static int intel_pmu_hw_config(struct perf_event *event)
|
||||||
event->attr.precise_ip)
|
event->attr.precise_ip)
|
||||||
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
|
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
|
||||||
|
|
||||||
|
if (intel_pmu_has_acr(event->pmu) && intel_pmu_is_acr_group(event)) {
|
||||||
|
struct perf_event *sibling, *leader = event->group_leader;
|
||||||
|
struct pmu *pmu = event->pmu;
|
||||||
|
bool has_sw_event = false;
|
||||||
|
int num = 0, idx = 0;
|
||||||
|
u64 cause_mask = 0;
|
||||||
|
|
||||||
|
/* Not support perf metrics */
|
||||||
|
if (is_metric_event(event))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* Not support freq mode */
|
||||||
|
if (event->attr.freq)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* PDist is not supported */
|
||||||
|
if (event->attr.config2 && event->attr.precise_ip > 2)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
/* The reload value cannot exceeds the max period */
|
||||||
|
if (event->attr.sample_period > x86_pmu.max_period)
|
||||||
|
return -EINVAL;
|
||||||
|
/*
|
||||||
|
* The counter-constraints of each event cannot be finalized
|
||||||
|
* unless the whole group is scanned. However, it's hard
|
||||||
|
* to know whether the event is the last one of the group.
|
||||||
|
* Recalculate the counter-constraints for each event when
|
||||||
|
* adding a new event.
|
||||||
|
*
|
||||||
|
* The group is traversed twice, which may be optimized later.
|
||||||
|
* In the first round,
|
||||||
|
* - Find all events which do reload when other events
|
||||||
|
* overflow and set the corresponding counter-constraints
|
||||||
|
* - Add all events, which can cause other events reload,
|
||||||
|
* in the cause_mask
|
||||||
|
* - Error out if the number of events exceeds the HW limit
|
||||||
|
* - The ACR events must be contiguous.
|
||||||
|
* Error out if there are non-X86 events between ACR events.
|
||||||
|
* This is not a HW limit, but a SW limit.
|
||||||
|
* With the assumption, the intel_pmu_acr_late_setup() can
|
||||||
|
* easily convert the event idx to counter idx without
|
||||||
|
* traversing the whole event list.
|
||||||
|
*/
|
||||||
|
if (!is_x86_event(leader))
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (leader->attr.config2)
|
||||||
|
intel_pmu_set_acr_cntr_constr(leader, &cause_mask, &num);
|
||||||
|
|
||||||
|
if (leader->nr_siblings) {
|
||||||
|
for_each_sibling_event(sibling, leader) {
|
||||||
|
if (!is_x86_event(sibling)) {
|
||||||
|
has_sw_event = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!sibling->attr.config2)
|
||||||
|
continue;
|
||||||
|
if (has_sw_event)
|
||||||
|
return -EINVAL;
|
||||||
|
intel_pmu_set_acr_cntr_constr(sibling, &cause_mask, &num);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (leader != event && event->attr.config2) {
|
||||||
|
if (has_sw_event)
|
||||||
|
return -EINVAL;
|
||||||
|
intel_pmu_set_acr_cntr_constr(event, &cause_mask, &num);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hweight64(cause_mask) > hweight64(hybrid(pmu, acr_cause_mask64)) ||
|
||||||
|
num > hweight64(hybrid(event->pmu, acr_cntr_mask64)))
|
||||||
|
return -EINVAL;
|
||||||
|
/*
|
||||||
|
* In the second round, apply the counter-constraints for
|
||||||
|
* the events which can cause other events reload.
|
||||||
|
*/
|
||||||
|
intel_pmu_set_acr_caused_constr(leader, idx++, cause_mask);
|
||||||
|
|
||||||
|
if (leader->nr_siblings) {
|
||||||
|
for_each_sibling_event(sibling, leader)
|
||||||
|
intel_pmu_set_acr_caused_constr(sibling, idx++, cause_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (leader != event)
|
||||||
|
intel_pmu_set_acr_caused_constr(event, idx, cause_mask);
|
||||||
|
|
||||||
|
leader->hw.flags |= PERF_X86_EVENT_ACR;
|
||||||
|
}
|
||||||
|
|
||||||
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
|
if ((event->attr.type == PERF_TYPE_HARDWARE) ||
|
||||||
(event->attr.type == PERF_TYPE_HW_CACHE))
|
(event->attr.type == PERF_TYPE_HW_CACHE))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -4354,7 +4582,7 @@ static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr, void *data)
|
||||||
.guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
|
.guest = intel_ctrl & ~cpuc->intel_ctrl_host_mask & ~pebs_mask,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (!x86_pmu.pebs)
|
if (!x86_pmu.ds_pebs)
|
||||||
return arr;
|
return arr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4952,7 +5180,7 @@ int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) {
|
if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_DYN_CONSTRAINT)) {
|
||||||
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
|
size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
|
||||||
|
|
||||||
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
|
cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
|
||||||
|
@ -5041,7 +5269,7 @@ static inline bool intel_pmu_broken_perf_cap(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
static void update_pmu_cap(struct pmu *pmu)
|
||||||
{
|
{
|
||||||
unsigned int cntr, fixed_cntr, ecx, edx;
|
unsigned int cntr, fixed_cntr, ecx, edx;
|
||||||
union cpuid35_eax eax;
|
union cpuid35_eax eax;
|
||||||
|
@ -5050,20 +5278,30 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
|
||||||
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
|
cpuid(ARCH_PERFMON_EXT_LEAF, &eax.full, &ebx.full, &ecx, &edx);
|
||||||
|
|
||||||
if (ebx.split.umask2)
|
if (ebx.split.umask2)
|
||||||
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_UMASK2;
|
||||||
if (ebx.split.eq)
|
if (ebx.split.eq)
|
||||||
pmu->config_mask |= ARCH_PERFMON_EVENTSEL_EQ;
|
hybrid(pmu, config_mask) |= ARCH_PERFMON_EVENTSEL_EQ;
|
||||||
|
|
||||||
if (eax.split.cntr_subleaf) {
|
if (eax.split.cntr_subleaf) {
|
||||||
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_NUM_COUNTER_LEAF,
|
||||||
&cntr, &fixed_cntr, &ecx, &edx);
|
&cntr, &fixed_cntr, &ecx, &edx);
|
||||||
pmu->cntr_mask64 = cntr;
|
hybrid(pmu, cntr_mask64) = cntr;
|
||||||
pmu->fixed_cntr_mask64 = fixed_cntr;
|
hybrid(pmu, fixed_cntr_mask64) = fixed_cntr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (eax.split.acr_subleaf) {
|
||||||
|
cpuid_count(ARCH_PERFMON_EXT_LEAF, ARCH_PERFMON_ACR_LEAF,
|
||||||
|
&cntr, &fixed_cntr, &ecx, &edx);
|
||||||
|
/* The mask of the counters which can be reloaded */
|
||||||
|
hybrid(pmu, acr_cntr_mask64) = cntr | ((u64)fixed_cntr << INTEL_PMC_IDX_FIXED);
|
||||||
|
|
||||||
|
/* The mask of the counters which can cause a reload of reloadable counters */
|
||||||
|
hybrid(pmu, acr_cause_mask64) = ecx | ((u64)edx << INTEL_PMC_IDX_FIXED);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!intel_pmu_broken_perf_cap()) {
|
if (!intel_pmu_broken_perf_cap()) {
|
||||||
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
|
/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
|
||||||
rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
|
rdmsrl(MSR_IA32_PERF_CAPABILITIES, hybrid(pmu, intel_cap).capabilities);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5150,7 +5388,7 @@ static bool init_hybrid_pmu(int cpu)
|
||||||
goto end;
|
goto end;
|
||||||
|
|
||||||
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||||
update_pmu_cap(pmu);
|
update_pmu_cap(&pmu->pmu);
|
||||||
|
|
||||||
intel_pmu_check_hybrid_pmus(pmu);
|
intel_pmu_check_hybrid_pmus(pmu);
|
||||||
|
|
||||||
|
@ -5524,7 +5762,7 @@ static __init void intel_clovertown_quirk(void)
|
||||||
* these chips.
|
* these chips.
|
||||||
*/
|
*/
|
||||||
pr_warn("PEBS disabled due to CPU errata\n");
|
pr_warn("PEBS disabled due to CPU errata\n");
|
||||||
x86_pmu.pebs = 0;
|
x86_pmu.ds_pebs = 0;
|
||||||
x86_pmu.pebs_constraints = NULL;
|
x86_pmu.pebs_constraints = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6012,7 +6250,7 @@ tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
static umode_t
|
static umode_t
|
||||||
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
pebs_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
{
|
{
|
||||||
return x86_pmu.pebs ? attr->mode : 0;
|
return x86_pmu.ds_pebs ? attr->mode : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static umode_t
|
static umode_t
|
||||||
|
@ -6043,6 +6281,21 @@ td_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
return attr->mode;
|
return attr->mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PMU_FORMAT_ATTR(acr_mask, "config2:0-63");
|
||||||
|
|
||||||
|
static struct attribute *format_acr_attrs[] = {
|
||||||
|
&format_attr_acr_mask.attr,
|
||||||
|
NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
static umode_t
|
||||||
|
acr_is_visible(struct kobject *kobj, struct attribute *attr, int i)
|
||||||
|
{
|
||||||
|
struct device *dev = kobj_to_dev(kobj);
|
||||||
|
|
||||||
|
return intel_pmu_has_acr(dev_get_drvdata(dev)) ? attr->mode : 0;
|
||||||
|
}
|
||||||
|
|
||||||
static struct attribute_group group_events_td = {
|
static struct attribute_group group_events_td = {
|
||||||
.name = "events",
|
.name = "events",
|
||||||
.is_visible = td_is_visible,
|
.is_visible = td_is_visible,
|
||||||
|
@ -6085,6 +6338,12 @@ static struct attribute_group group_format_evtsel_ext = {
|
||||||
.is_visible = evtsel_ext_is_visible,
|
.is_visible = evtsel_ext_is_visible,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static struct attribute_group group_format_acr = {
|
||||||
|
.name = "format",
|
||||||
|
.attrs = format_acr_attrs,
|
||||||
|
.is_visible = acr_is_visible,
|
||||||
|
};
|
||||||
|
|
||||||
static struct attribute_group group_default = {
|
static struct attribute_group group_default = {
|
||||||
.attrs = intel_pmu_attrs,
|
.attrs = intel_pmu_attrs,
|
||||||
.is_visible = default_is_visible,
|
.is_visible = default_is_visible,
|
||||||
|
@ -6099,6 +6358,7 @@ static const struct attribute_group *attr_update[] = {
|
||||||
&group_format_extra,
|
&group_format_extra,
|
||||||
&group_format_extra_skl,
|
&group_format_extra_skl,
|
||||||
&group_format_evtsel_ext,
|
&group_format_evtsel_ext,
|
||||||
|
&group_format_acr,
|
||||||
&group_default,
|
&group_default,
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
@ -6383,6 +6643,7 @@ static const struct attribute_group *hybrid_attr_update[] = {
|
||||||
&group_caps_lbr,
|
&group_caps_lbr,
|
||||||
&hybrid_group_format_extra,
|
&hybrid_group_format_extra,
|
||||||
&group_format_evtsel_ext,
|
&group_format_evtsel_ext,
|
||||||
|
&group_format_acr,
|
||||||
&group_default,
|
&group_default,
|
||||||
&hybrid_group_cpus,
|
&hybrid_group_cpus,
|
||||||
NULL,
|
NULL,
|
||||||
|
@ -6575,6 +6836,7 @@ static __always_inline void intel_pmu_init_skt(struct pmu *pmu)
|
||||||
intel_pmu_init_grt(pmu);
|
intel_pmu_init_grt(pmu);
|
||||||
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
|
hybrid(pmu, event_constraints) = intel_skt_event_constraints;
|
||||||
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
|
hybrid(pmu, extra_regs) = intel_cmt_extra_regs;
|
||||||
|
static_call_update(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
|
||||||
}
|
}
|
||||||
|
|
||||||
__init int intel_pmu_init(void)
|
__init int intel_pmu_init(void)
|
||||||
|
@ -6635,6 +6897,7 @@ __init int intel_pmu_init(void)
|
||||||
|
|
||||||
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
|
x86_pmu.pebs_events_mask = intel_pmu_pebs_mask(x86_pmu.cntr_mask64);
|
||||||
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
|
x86_pmu.pebs_capable = PEBS_COUNTER_MASK;
|
||||||
|
x86_pmu.config_mask = X86_RAW_EVENT_MASK;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
* Quirk: v2 perfmon does not report fixed-purpose events, so
|
||||||
|
@ -6663,7 +6926,7 @@ __init int intel_pmu_init(void)
|
||||||
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
|
if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
|
||||||
intel_pmu_arch_lbr_init();
|
intel_pmu_arch_lbr_init();
|
||||||
|
|
||||||
intel_ds_init();
|
intel_pebs_init();
|
||||||
|
|
||||||
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
|
x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
|
||||||
|
|
||||||
|
@ -6673,6 +6936,12 @@ __init int intel_pmu_init(void)
|
||||||
pr_cont(" AnyThread deprecated, ");
|
pr_cont(" AnyThread deprecated, ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Many features on and after V6 require dynamic constraint,
|
||||||
|
* e.g., Arch PEBS, ACR.
|
||||||
|
*/
|
||||||
|
if (version >= 6)
|
||||||
|
x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
|
||||||
/*
|
/*
|
||||||
* Install the hw-cache-events table:
|
* Install the hw-cache-events table:
|
||||||
*/
|
*/
|
||||||
|
@ -6884,6 +7153,18 @@ __init int intel_pmu_init(void)
|
||||||
name = "crestmont";
|
name = "crestmont";
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case INTEL_ATOM_DARKMONT_X:
|
||||||
|
intel_pmu_init_skt(NULL);
|
||||||
|
intel_pmu_pebs_data_source_cmt();
|
||||||
|
x86_pmu.pebs_latency_data = cmt_latency_data;
|
||||||
|
x86_pmu.get_event_constraints = cmt_get_event_constraints;
|
||||||
|
td_attr = skt_events_attrs;
|
||||||
|
mem_attr = grt_mem_attrs;
|
||||||
|
extra_attr = cmt_format_attr;
|
||||||
|
pr_cont("Darkmont events, ");
|
||||||
|
name = "darkmont";
|
||||||
|
break;
|
||||||
|
|
||||||
case INTEL_WESTMERE:
|
case INTEL_WESTMERE:
|
||||||
case INTEL_WESTMERE_EP:
|
case INTEL_WESTMERE_EP:
|
||||||
case INTEL_WESTMERE_EX:
|
case INTEL_WESTMERE_EX:
|
||||||
|
@ -7433,6 +7714,18 @@ __init int intel_pmu_init(void)
|
||||||
x86_pmu.attr_update = hybrid_attr_update;
|
x86_pmu.attr_update = hybrid_attr_update;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The archPerfmonExt (0x23) includes an enhanced enumeration of
|
||||||
|
* PMU architectural features with a per-core view. For non-hybrid,
|
||||||
|
* each core has the same PMU capabilities. It's good enough to
|
||||||
|
* update the x86_pmu from the booting CPU. For hybrid, the x86_pmu
|
||||||
|
* is used to keep the common capabilities. Still keep the values
|
||||||
|
* from the leaf 0xa. The core specific update will be done later
|
||||||
|
* when a new type is online.
|
||||||
|
*/
|
||||||
|
if (!is_hybrid() && boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
|
||||||
|
update_pmu_cap(NULL);
|
||||||
|
|
||||||
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
intel_pmu_check_counters_mask(&x86_pmu.cntr_mask64,
|
||||||
&x86_pmu.fixed_cntr_mask64,
|
&x86_pmu.fixed_cntr_mask64,
|
||||||
&x86_pmu.intel_ctrl);
|
&x86_pmu.intel_ctrl);
|
||||||
|
|
|
@ -624,7 +624,7 @@ static int alloc_pebs_buffer(int cpu)
|
||||||
int max, node = cpu_to_node(cpu);
|
int max, node = cpu_to_node(cpu);
|
||||||
void *buffer, *insn_buff, *cea;
|
void *buffer, *insn_buff, *cea;
|
||||||
|
|
||||||
if (!x86_pmu.pebs)
|
if (!x86_pmu.ds_pebs)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
||||||
|
@ -659,7 +659,7 @@ static void release_pebs_buffer(int cpu)
|
||||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||||
void *cea;
|
void *cea;
|
||||||
|
|
||||||
if (!x86_pmu.pebs)
|
if (!x86_pmu.ds_pebs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
kfree(per_cpu(insn_buffer, cpu));
|
kfree(per_cpu(insn_buffer, cpu));
|
||||||
|
@ -734,7 +734,7 @@ void release_ds_buffers(void)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
if (!x86_pmu.bts && !x86_pmu.pebs)
|
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu)
|
for_each_possible_cpu(cpu)
|
||||||
|
@ -750,7 +750,8 @@ void release_ds_buffers(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
release_pebs_buffer(cpu);
|
if (x86_pmu.ds_pebs)
|
||||||
|
release_pebs_buffer(cpu);
|
||||||
release_bts_buffer(cpu);
|
release_bts_buffer(cpu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -761,15 +762,17 @@ void reserve_ds_buffers(void)
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
x86_pmu.bts_active = 0;
|
x86_pmu.bts_active = 0;
|
||||||
x86_pmu.pebs_active = 0;
|
|
||||||
|
|
||||||
if (!x86_pmu.bts && !x86_pmu.pebs)
|
if (x86_pmu.ds_pebs)
|
||||||
|
x86_pmu.pebs_active = 0;
|
||||||
|
|
||||||
|
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!x86_pmu.bts)
|
if (!x86_pmu.bts)
|
||||||
bts_err = 1;
|
bts_err = 1;
|
||||||
|
|
||||||
if (!x86_pmu.pebs)
|
if (!x86_pmu.ds_pebs)
|
||||||
pebs_err = 1;
|
pebs_err = 1;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
|
@ -781,7 +784,8 @@ void reserve_ds_buffers(void)
|
||||||
if (!bts_err && alloc_bts_buffer(cpu))
|
if (!bts_err && alloc_bts_buffer(cpu))
|
||||||
bts_err = 1;
|
bts_err = 1;
|
||||||
|
|
||||||
if (!pebs_err && alloc_pebs_buffer(cpu))
|
if (x86_pmu.ds_pebs && !pebs_err &&
|
||||||
|
alloc_pebs_buffer(cpu))
|
||||||
pebs_err = 1;
|
pebs_err = 1;
|
||||||
|
|
||||||
if (bts_err && pebs_err)
|
if (bts_err && pebs_err)
|
||||||
|
@ -793,7 +797,7 @@ void reserve_ds_buffers(void)
|
||||||
release_bts_buffer(cpu);
|
release_bts_buffer(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pebs_err) {
|
if (x86_pmu.ds_pebs && pebs_err) {
|
||||||
for_each_possible_cpu(cpu)
|
for_each_possible_cpu(cpu)
|
||||||
release_pebs_buffer(cpu);
|
release_pebs_buffer(cpu);
|
||||||
}
|
}
|
||||||
|
@ -805,7 +809,7 @@ void reserve_ds_buffers(void)
|
||||||
if (x86_pmu.bts && !bts_err)
|
if (x86_pmu.bts && !bts_err)
|
||||||
x86_pmu.bts_active = 1;
|
x86_pmu.bts_active = 1;
|
||||||
|
|
||||||
if (x86_pmu.pebs && !pebs_err)
|
if (x86_pmu.ds_pebs && !pebs_err)
|
||||||
x86_pmu.pebs_active = 1;
|
x86_pmu.pebs_active = 1;
|
||||||
|
|
||||||
for_each_possible_cpu(cpu) {
|
for_each_possible_cpu(cpu) {
|
||||||
|
@ -1355,9 +1359,8 @@ static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void intel_pmu_late_setup(void)
|
void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc)
|
||||||
{
|
{
|
||||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
|
||||||
struct perf_event *event;
|
struct perf_event *event;
|
||||||
u64 pebs_data_cfg = 0;
|
u64 pebs_data_cfg = 0;
|
||||||
int i;
|
int i;
|
||||||
|
@ -1828,8 +1831,6 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event,
|
||||||
|
|
||||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||||
|
|
||||||
data->period = event->hw.last_period;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use latency for weight (only avail with PEBS-LL)
|
* Use latency for weight (only avail with PEBS-LL)
|
||||||
*/
|
*/
|
||||||
|
@ -2082,7 +2083,6 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event,
|
||||||
sample_type = event->attr.sample_type;
|
sample_type = event->attr.sample_type;
|
||||||
format_group = basic->format_group;
|
format_group = basic->format_group;
|
||||||
perf_sample_data_init(data, 0, event->hw.last_period);
|
perf_sample_data_init(data, 0, event->hw.last_period);
|
||||||
data->period = event->hw.last_period;
|
|
||||||
|
|
||||||
setup_pebs_time(event, data, basic->tsc);
|
setup_pebs_time(event, data, basic->tsc);
|
||||||
|
|
||||||
|
@ -2359,8 +2359,7 @@ __intel_pmu_pebs_last_event(struct perf_event *event,
|
||||||
* All but the last records are processed.
|
* All but the last records are processed.
|
||||||
* The last one is left to be able to call the overflow handler.
|
* The last one is left to be able to call the overflow handler.
|
||||||
*/
|
*/
|
||||||
if (perf_event_overflow(event, data, regs))
|
perf_event_overflow(event, data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
|
||||||
|
@ -2589,8 +2588,8 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
|
||||||
if (error[bit]) {
|
if (error[bit]) {
|
||||||
perf_log_lost_samples(event, error[bit]);
|
perf_log_lost_samples(event, error[bit]);
|
||||||
|
|
||||||
if (iregs && perf_event_account_interrupt(event))
|
if (iregs)
|
||||||
x86_pmu_stop(event, 0);
|
perf_event_account_interrupt(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (counts[bit]) {
|
if (counts[bit]) {
|
||||||
|
@ -2670,10 +2669,10 @@ static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs, struct perf_sample_d
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* BTS, PEBS probe and setup
|
* PEBS probe and setup
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void __init intel_ds_init(void)
|
void __init intel_pebs_init(void)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* No support for 32bit formats
|
* No support for 32bit formats
|
||||||
|
@ -2681,13 +2680,12 @@ void __init intel_ds_init(void)
|
||||||
if (!boot_cpu_has(X86_FEATURE_DTES64))
|
if (!boot_cpu_has(X86_FEATURE_DTES64))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
|
x86_pmu.ds_pebs = boot_cpu_has(X86_FEATURE_PEBS);
|
||||||
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
|
|
||||||
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
|
||||||
if (x86_pmu.version <= 4)
|
if (x86_pmu.version <= 4)
|
||||||
x86_pmu.pebs_no_isolation = 1;
|
x86_pmu.pebs_no_isolation = 1;
|
||||||
|
|
||||||
if (x86_pmu.pebs) {
|
if (x86_pmu.ds_pebs) {
|
||||||
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
|
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
|
||||||
char *pebs_qual = "";
|
char *pebs_qual = "";
|
||||||
int format = x86_pmu.intel_cap.pebs_format;
|
int format = x86_pmu.intel_cap.pebs_format;
|
||||||
|
@ -2695,6 +2693,11 @@ void __init intel_ds_init(void)
|
||||||
if (format < 4)
|
if (format < 4)
|
||||||
x86_pmu.intel_cap.pebs_baseline = 0;
|
x86_pmu.intel_cap.pebs_baseline = 0;
|
||||||
|
|
||||||
|
x86_pmu.pebs_enable = intel_pmu_pebs_enable;
|
||||||
|
x86_pmu.pebs_disable = intel_pmu_pebs_disable;
|
||||||
|
x86_pmu.pebs_enable_all = intel_pmu_pebs_enable_all;
|
||||||
|
x86_pmu.pebs_disable_all = intel_pmu_pebs_disable_all;
|
||||||
|
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case 0:
|
case 0:
|
||||||
pr_cont("PEBS fmt0%c, ", pebs_type);
|
pr_cont("PEBS fmt0%c, ", pebs_type);
|
||||||
|
@ -2779,7 +2782,7 @@ void __init intel_ds_init(void)
|
||||||
|
|
||||||
default:
|
default:
|
||||||
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
|
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
|
||||||
x86_pmu.pebs = 0;
|
x86_pmu.ds_pebs = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2788,7 +2791,7 @@ void perf_restore_debug_store(void)
|
||||||
{
|
{
|
||||||
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
|
||||||
|
|
||||||
if (!x86_pmu.bts && !x86_pmu.pebs)
|
if (!x86_pmu.bts && !x86_pmu.ds_pebs)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
|
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
|
||||||
|
|
|
@ -241,19 +241,20 @@ again:
|
||||||
|
|
||||||
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
|
||||||
struct perf_event *event = cpuc->events[bit];
|
struct perf_event *event = cpuc->events[bit];
|
||||||
|
u64 last_period;
|
||||||
|
|
||||||
handled++;
|
handled++;
|
||||||
|
|
||||||
if (!test_bit(bit, cpuc->active_mask))
|
if (!test_bit(bit, cpuc->active_mask))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
last_period = event->hw.last_period;
|
||||||
if (!intel_pmu_save_and_restart(event))
|
if (!intel_pmu_save_and_restart(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
perf_sample_data_init(&data, 0, event->hw.last_period);
|
perf_sample_data_init(&data, 0, last_period);
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -1618,7 +1618,7 @@ void __init intel_pmu_arch_lbr_init(void)
|
||||||
x86_pmu.lbr_nr = lbr_nr;
|
x86_pmu.lbr_nr = lbr_nr;
|
||||||
|
|
||||||
if (!!x86_pmu.lbr_counters)
|
if (!!x86_pmu.lbr_counters)
|
||||||
x86_pmu.flags |= PMU_FL_BR_CNTR;
|
x86_pmu.flags |= PMU_FL_BR_CNTR | PMU_FL_DYN_CONSTRAINT;
|
||||||
|
|
||||||
if (x86_pmu.lbr_mispred)
|
if (x86_pmu.lbr_mispred)
|
||||||
static_branch_enable(&x86_lbr_mispred);
|
static_branch_enable(&x86_lbr_mispred);
|
||||||
|
|
|
@ -1072,8 +1072,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (handled)
|
if (handled)
|
||||||
|
|
|
@ -1863,6 +1863,8 @@ static __init int pt_init(void)
|
||||||
|
|
||||||
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
|
||||||
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
|
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
|
||||||
|
else
|
||||||
|
pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_PREFER_LARGE;
|
||||||
|
|
||||||
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
|
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE |
|
||||||
PERF_PMU_CAP_ITRACE |
|
PERF_PMU_CAP_ITRACE |
|
||||||
|
|
|
@ -305,17 +305,11 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
|
||||||
{
|
{
|
||||||
struct intel_uncore_box *box;
|
struct intel_uncore_box *box;
|
||||||
struct perf_event *event;
|
struct perf_event *event;
|
||||||
unsigned long flags;
|
|
||||||
int bit;
|
int bit;
|
||||||
|
|
||||||
box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
|
box = container_of(hrtimer, struct intel_uncore_box, hrtimer);
|
||||||
if (!box->n_active || box->cpu != smp_processor_id())
|
if (!box->n_active || box->cpu != smp_processor_id())
|
||||||
return HRTIMER_NORESTART;
|
return HRTIMER_NORESTART;
|
||||||
/*
|
|
||||||
* disable local interrupt to prevent uncore_pmu_event_start/stop
|
|
||||||
* to interrupt the update process
|
|
||||||
*/
|
|
||||||
local_irq_save(flags);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* handle boxes with an active event list as opposed to active
|
* handle boxes with an active event list as opposed to active
|
||||||
|
@ -328,8 +322,6 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
|
||||||
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
|
for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX)
|
||||||
uncore_perf_event_update(box, box->events[bit]);
|
uncore_perf_event_update(box, box->events[bit]);
|
||||||
|
|
||||||
local_irq_restore(flags);
|
|
||||||
|
|
||||||
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
|
hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration));
|
||||||
return HRTIMER_RESTART;
|
return HRTIMER_RESTART;
|
||||||
}
|
}
|
||||||
|
@ -337,7 +329,7 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer)
|
||||||
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
|
void uncore_pmu_start_hrtimer(struct intel_uncore_box *box)
|
||||||
{
|
{
|
||||||
hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
|
hrtimer_start(&box->hrtimer, ns_to_ktime(box->hrtimer_duration),
|
||||||
HRTIMER_MODE_REL_PINNED);
|
HRTIMER_MODE_REL_PINNED_HARD);
|
||||||
}
|
}
|
||||||
|
|
||||||
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
|
void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
|
||||||
|
@ -347,7 +339,7 @@ void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box)
|
||||||
|
|
||||||
static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
|
static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
|
||||||
{
|
{
|
||||||
hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
hrtimer_setup(&box->hrtimer, uncore_pmu_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
|
static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type,
|
||||||
|
|
|
@ -127,6 +127,11 @@ static inline bool is_pebs_counter_event_group(struct perf_event *event)
|
||||||
return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR);
|
return check_leader_group(event->group_leader, PERF_X86_EVENT_PEBS_CNTR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool is_acr_event_group(struct perf_event *event)
|
||||||
|
{
|
||||||
|
return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
|
||||||
|
}
|
||||||
|
|
||||||
struct amd_nb {
|
struct amd_nb {
|
||||||
int nb_id; /* NorthBridge id */
|
int nb_id; /* NorthBridge id */
|
||||||
int refcnt; /* reference count */
|
int refcnt; /* reference count */
|
||||||
|
@ -268,6 +273,7 @@ struct cpu_hw_events {
|
||||||
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
|
struct event_constraint *event_constraint[X86_PMC_IDX_MAX];
|
||||||
|
|
||||||
int n_excl; /* the number of exclusive events */
|
int n_excl; /* the number of exclusive events */
|
||||||
|
int n_late_setup; /* the num of events needs late setup */
|
||||||
|
|
||||||
unsigned int txn_flags;
|
unsigned int txn_flags;
|
||||||
int is_fake;
|
int is_fake;
|
||||||
|
@ -293,6 +299,10 @@ struct cpu_hw_events {
|
||||||
u64 fixed_ctrl_val;
|
u64 fixed_ctrl_val;
|
||||||
u64 active_fixed_ctrl_val;
|
u64 active_fixed_ctrl_val;
|
||||||
|
|
||||||
|
/* Intel ACR configuration */
|
||||||
|
u64 acr_cfg_b[X86_PMC_IDX_MAX];
|
||||||
|
u64 acr_cfg_c[X86_PMC_IDX_MAX];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Intel LBR bits
|
* Intel LBR bits
|
||||||
*/
|
*/
|
||||||
|
@ -714,6 +724,15 @@ struct x86_hybrid_pmu {
|
||||||
u64 fixed_cntr_mask64;
|
u64 fixed_cntr_mask64;
|
||||||
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
union {
|
||||||
|
u64 acr_cntr_mask64;
|
||||||
|
unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
u64 acr_cause_mask64;
|
||||||
|
unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
|
};
|
||||||
struct event_constraint unconstrained;
|
struct event_constraint unconstrained;
|
||||||
|
|
||||||
u64 hw_cache_event_ids
|
u64 hw_cache_event_ids
|
||||||
|
@ -796,6 +815,10 @@ struct x86_pmu {
|
||||||
int (*hw_config)(struct perf_event *event);
|
int (*hw_config)(struct perf_event *event);
|
||||||
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
|
||||||
void (*late_setup)(void);
|
void (*late_setup)(void);
|
||||||
|
void (*pebs_enable)(struct perf_event *event);
|
||||||
|
void (*pebs_disable)(struct perf_event *event);
|
||||||
|
void (*pebs_enable_all)(void);
|
||||||
|
void (*pebs_disable_all)(void);
|
||||||
unsigned eventsel;
|
unsigned eventsel;
|
||||||
unsigned perfctr;
|
unsigned perfctr;
|
||||||
unsigned fixedctr;
|
unsigned fixedctr;
|
||||||
|
@ -812,6 +835,14 @@ struct x86_pmu {
|
||||||
u64 fixed_cntr_mask64;
|
u64 fixed_cntr_mask64;
|
||||||
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
unsigned long fixed_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
};
|
};
|
||||||
|
union {
|
||||||
|
u64 acr_cntr_mask64;
|
||||||
|
unsigned long acr_cntr_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
|
};
|
||||||
|
union {
|
||||||
|
u64 acr_cause_mask64;
|
||||||
|
unsigned long acr_cause_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||||
|
};
|
||||||
int cntval_bits;
|
int cntval_bits;
|
||||||
u64 cntval_mask;
|
u64 cntval_mask;
|
||||||
union {
|
union {
|
||||||
|
@ -878,7 +909,7 @@ struct x86_pmu {
|
||||||
*/
|
*/
|
||||||
unsigned int bts :1,
|
unsigned int bts :1,
|
||||||
bts_active :1,
|
bts_active :1,
|
||||||
pebs :1,
|
ds_pebs :1,
|
||||||
pebs_active :1,
|
pebs_active :1,
|
||||||
pebs_broken :1,
|
pebs_broken :1,
|
||||||
pebs_prec_dist :1,
|
pebs_prec_dist :1,
|
||||||
|
@ -1049,6 +1080,7 @@ do { \
|
||||||
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
|
#define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */
|
||||||
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
|
#define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */
|
||||||
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
|
#define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */
|
||||||
|
#define PMU_FL_DYN_CONSTRAINT 0x800 /* Needs dynamic constraint */
|
||||||
|
|
||||||
#define EVENT_VAR(_id) event_attr_##_id
|
#define EVENT_VAR(_id) event_attr_##_id
|
||||||
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
|
||||||
|
@ -1091,6 +1123,7 @@ static struct perf_pmu_format_hybrid_attr format_attr_hybrid_##_name = {\
|
||||||
.pmu_type = _pmu, \
|
.pmu_type = _pmu, \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int is_x86_event(struct perf_event *event);
|
||||||
struct pmu *x86_get_pmu(unsigned int cpu);
|
struct pmu *x86_get_pmu(unsigned int cpu);
|
||||||
extern struct x86_pmu x86_pmu __read_mostly;
|
extern struct x86_pmu x86_pmu __read_mostly;
|
||||||
|
|
||||||
|
@ -1098,6 +1131,10 @@ DECLARE_STATIC_CALL(x86_pmu_set_period, *x86_pmu.set_period);
|
||||||
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
|
DECLARE_STATIC_CALL(x86_pmu_update, *x86_pmu.update);
|
||||||
DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
|
DECLARE_STATIC_CALL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
|
||||||
DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup);
|
DECLARE_STATIC_CALL(x86_pmu_late_setup, *x86_pmu.late_setup);
|
||||||
|
DECLARE_STATIC_CALL(x86_pmu_pebs_enable, *x86_pmu.pebs_enable);
|
||||||
|
DECLARE_STATIC_CALL(x86_pmu_pebs_disable, *x86_pmu.pebs_disable);
|
||||||
|
DECLARE_STATIC_CALL(x86_pmu_pebs_enable_all, *x86_pmu.pebs_enable_all);
|
||||||
|
DECLARE_STATIC_CALL(x86_pmu_pebs_disable_all, *x86_pmu.pebs_disable_all);
|
||||||
|
|
||||||
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
|
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
|
||||||
{
|
{
|
||||||
|
@ -1587,6 +1624,8 @@ void intel_pmu_disable_bts(void);
|
||||||
|
|
||||||
int intel_pmu_drain_bts_buffer(void);
|
int intel_pmu_drain_bts_buffer(void);
|
||||||
|
|
||||||
|
void intel_pmu_late_setup(void);
|
||||||
|
|
||||||
u64 grt_latency_data(struct perf_event *event, u64 status);
|
u64 grt_latency_data(struct perf_event *event, u64 status);
|
||||||
|
|
||||||
u64 cmt_latency_data(struct perf_event *event, u64 status);
|
u64 cmt_latency_data(struct perf_event *event, u64 status);
|
||||||
|
@ -1643,11 +1682,13 @@ void intel_pmu_pebs_disable_all(void);
|
||||||
|
|
||||||
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in);
|
||||||
|
|
||||||
|
void intel_pmu_pebs_late_setup(struct cpu_hw_events *cpuc);
|
||||||
|
|
||||||
void intel_pmu_drain_pebs_buffer(void);
|
void intel_pmu_drain_pebs_buffer(void);
|
||||||
|
|
||||||
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
|
void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
|
||||||
|
|
||||||
void intel_ds_init(void);
|
void intel_pebs_init(void);
|
||||||
|
|
||||||
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
|
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
|
||||||
struct cpu_hw_events *cpuc,
|
struct cpu_hw_events *cpuc,
|
||||||
|
|
|
@ -2,23 +2,24 @@
|
||||||
/*
|
/*
|
||||||
* struct hw_perf_event.flags flags
|
* struct hw_perf_event.flags flags
|
||||||
*/
|
*/
|
||||||
PERF_ARCH(PEBS_LDLAT, 0x00001) /* ld+ldlat data address sampling */
|
PERF_ARCH(PEBS_LDLAT, 0x0000001) /* ld+ldlat data address sampling */
|
||||||
PERF_ARCH(PEBS_ST, 0x00002) /* st data address sampling */
|
PERF_ARCH(PEBS_ST, 0x0000002) /* st data address sampling */
|
||||||
PERF_ARCH(PEBS_ST_HSW, 0x00004) /* haswell style datala, store */
|
PERF_ARCH(PEBS_ST_HSW, 0x0000004) /* haswell style datala, store */
|
||||||
PERF_ARCH(PEBS_LD_HSW, 0x00008) /* haswell style datala, load */
|
PERF_ARCH(PEBS_LD_HSW, 0x0000008) /* haswell style datala, load */
|
||||||
PERF_ARCH(PEBS_NA_HSW, 0x00010) /* haswell style datala, unknown */
|
PERF_ARCH(PEBS_NA_HSW, 0x0000010) /* haswell style datala, unknown */
|
||||||
PERF_ARCH(EXCL, 0x00020) /* HT exclusivity on counter */
|
PERF_ARCH(EXCL, 0x0000020) /* HT exclusivity on counter */
|
||||||
PERF_ARCH(DYNAMIC, 0x00040) /* dynamic alloc'd constraint */
|
PERF_ARCH(DYNAMIC, 0x0000040) /* dynamic alloc'd constraint */
|
||||||
PERF_ARCH(PEBS_CNTR, 0x00080) /* PEBS counters snapshot */
|
PERF_ARCH(PEBS_CNTR, 0x0000080) /* PEBS counters snapshot */
|
||||||
PERF_ARCH(EXCL_ACCT, 0x00100) /* accounted EXCL event */
|
PERF_ARCH(EXCL_ACCT, 0x0000100) /* accounted EXCL event */
|
||||||
PERF_ARCH(AUTO_RELOAD, 0x00200) /* use PEBS auto-reload */
|
PERF_ARCH(AUTO_RELOAD, 0x0000200) /* use PEBS auto-reload */
|
||||||
PERF_ARCH(LARGE_PEBS, 0x00400) /* use large PEBS */
|
PERF_ARCH(LARGE_PEBS, 0x0000400) /* use large PEBS */
|
||||||
PERF_ARCH(PEBS_VIA_PT, 0x00800) /* use PT buffer for PEBS */
|
PERF_ARCH(PEBS_VIA_PT, 0x0000800) /* use PT buffer for PEBS */
|
||||||
PERF_ARCH(PAIR, 0x01000) /* Large Increment per Cycle */
|
PERF_ARCH(PAIR, 0x0001000) /* Large Increment per Cycle */
|
||||||
PERF_ARCH(LBR_SELECT, 0x02000) /* Save/Restore MSR_LBR_SELECT */
|
PERF_ARCH(LBR_SELECT, 0x0002000) /* Save/Restore MSR_LBR_SELECT */
|
||||||
PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
|
PERF_ARCH(TOPDOWN, 0x0004000) /* Count Topdown slots/metrics events */
|
||||||
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
|
PERF_ARCH(PEBS_STLAT, 0x0008000) /* st+stlat data address sampling */
|
||||||
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
|
PERF_ARCH(AMD_BRS, 0x0010000) /* AMD Branch Sampling */
|
||||||
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
|
PERF_ARCH(PEBS_LAT_HYBRID, 0x0020000) /* ld and st lat for hybrid */
|
||||||
PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
|
PERF_ARCH(NEEDS_BRANCH_STACK, 0x0040000) /* require branch stack setup */
|
||||||
PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
|
PERF_ARCH(BRANCH_COUNTERS, 0x0080000) /* logs the counters in the extra space of each branch */
|
||||||
|
PERF_ARCH(ACR, 0x0100000) /* Auto counter reload */
|
||||||
|
|
|
@ -397,8 +397,7 @@ again:
|
||||||
if (!x86_perf_event_set_period(event))
|
if (!x86_perf_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
x86_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -602,7 +602,11 @@
|
||||||
/* V6 PMON MSR range */
|
/* V6 PMON MSR range */
|
||||||
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
|
#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
|
||||||
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
|
#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
|
||||||
|
#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902
|
||||||
|
#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903
|
||||||
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
|
#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
|
||||||
|
#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982
|
||||||
|
#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983
|
||||||
#define MSR_IA32_PMC_V6_STEP 4
|
#define MSR_IA32_PMC_V6_STEP 4
|
||||||
|
|
||||||
/* KeyID partitioning between MKTME and TDX */
|
/* KeyID partitioning between MKTME and TDX */
|
||||||
|
|
|
@ -195,6 +195,7 @@ union cpuid10_edx {
|
||||||
*/
|
*/
|
||||||
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
#define ARCH_PERFMON_EXT_LEAF 0x00000023
|
||||||
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
|
||||||
|
#define ARCH_PERFMON_ACR_LEAF 0x2
|
||||||
|
|
||||||
union cpuid35_eax {
|
union cpuid35_eax {
|
||||||
struct {
|
struct {
|
||||||
|
|
|
@ -840,6 +840,11 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
|
||||||
insn_byte_t p;
|
insn_byte_t p;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
|
/* x86_nops[insn->length]; same as jmp with .offs = 0 */
|
||||||
|
if (insn->length <= ASM_NOP_MAX &&
|
||||||
|
!memcmp(insn->kaddr, x86_nops[insn->length], insn->length))
|
||||||
|
goto setup;
|
||||||
|
|
||||||
switch (opc1) {
|
switch (opc1) {
|
||||||
case 0xeb: /* jmp 8 */
|
case 0xeb: /* jmp 8 */
|
||||||
case 0xe9: /* jmp 32 */
|
case 0xe9: /* jmp 32 */
|
||||||
|
|
|
@ -388,8 +388,7 @@ irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
|
||||||
struct pt_regs *regs = get_irq_regs();
|
struct pt_regs *regs = get_irq_regs();
|
||||||
|
|
||||||
perf_sample_data_init(&data, 0, last_period);
|
perf_sample_data_init(&data, 0, last_period);
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
xtensa_pmu_stop(event, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rc = IRQ_HANDLED;
|
rc = IRQ_HANDLED;
|
||||||
|
|
|
@ -474,8 +474,7 @@ static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
if (!armpmu_event_set_period(event))
|
if (!armpmu_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
m1_pmu_disable_event(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu_pmu->start(cpu_pmu);
|
cpu_pmu->start(cpu_pmu);
|
||||||
|
|
|
@ -887,8 +887,7 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
* an irq_work which will be taken care of in the handling of
|
* an irq_work which will be taken care of in the handling of
|
||||||
* IPI_IRQ_WORK.
|
* IPI_IRQ_WORK.
|
||||||
*/
|
*/
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
cpu_pmu->disable(event);
|
|
||||||
}
|
}
|
||||||
armv8pmu_start(cpu_pmu);
|
armv8pmu_start(cpu_pmu);
|
||||||
|
|
||||||
|
|
|
@ -276,8 +276,7 @@ armv6pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
if (!armpmu_event_set_period(event))
|
if (!armpmu_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
cpu_pmu->disable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -930,8 +930,7 @@ static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
if (!armpmu_event_set_period(event))
|
if (!armpmu_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
cpu_pmu->disable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -186,8 +186,7 @@ xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
if (!armpmu_event_set_period(event))
|
if (!armpmu_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
cpu_pmu->disable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
irq_work_run();
|
irq_work_run();
|
||||||
|
@ -519,8 +518,7 @@ xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu)
|
||||||
if (!armpmu_event_set_period(event))
|
if (!armpmu_event_set_period(event))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (perf_event_overflow(event, &data, regs))
|
perf_event_overflow(event, &data, regs);
|
||||||
cpu_pmu->disable(event);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
irq_work_run();
|
irq_work_run();
|
||||||
|
|
|
@ -60,7 +60,6 @@ enum cpuhp_state {
|
||||||
/* PREPARE section invoked on a control CPU */
|
/* PREPARE section invoked on a control CPU */
|
||||||
CPUHP_OFFLINE = 0,
|
CPUHP_OFFLINE = 0,
|
||||||
CPUHP_CREATE_THREADS,
|
CPUHP_CREATE_THREADS,
|
||||||
CPUHP_PERF_PREPARE,
|
|
||||||
CPUHP_PERF_X86_PREPARE,
|
CPUHP_PERF_X86_PREPARE,
|
||||||
CPUHP_PERF_X86_AMD_UNCORE_PREP,
|
CPUHP_PERF_X86_AMD_UNCORE_PREP,
|
||||||
CPUHP_PERF_POWER,
|
CPUHP_PERF_POWER,
|
||||||
|
|
|
@ -26,18 +26,9 @@
|
||||||
# include <asm/local64.h>
|
# include <asm/local64.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define PERF_GUEST_ACTIVE 0x01
|
|
||||||
#define PERF_GUEST_USER 0x02
|
|
||||||
|
|
||||||
struct perf_guest_info_callbacks {
|
|
||||||
unsigned int (*state)(void);
|
|
||||||
unsigned long (*get_ip)(void);
|
|
||||||
unsigned int (*handle_intel_pt_intr)(void);
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||||
#include <linux/rhashtable-types.h>
|
# include <linux/rhashtable-types.h>
|
||||||
#include <asm/hw_breakpoint.h>
|
# include <asm/hw_breakpoint.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
|
@ -62,19 +53,20 @@ struct perf_guest_info_callbacks {
|
||||||
#include <linux/security.h>
|
#include <linux/security.h>
|
||||||
#include <linux/static_call.h>
|
#include <linux/static_call.h>
|
||||||
#include <linux/lockdep.h>
|
#include <linux/lockdep.h>
|
||||||
|
|
||||||
#include <asm/local.h>
|
#include <asm/local.h>
|
||||||
|
|
||||||
struct perf_callchain_entry {
|
struct perf_callchain_entry {
|
||||||
__u64 nr;
|
u64 nr;
|
||||||
__u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
|
u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_callchain_entry_ctx {
|
struct perf_callchain_entry_ctx {
|
||||||
struct perf_callchain_entry *entry;
|
struct perf_callchain_entry *entry;
|
||||||
u32 max_stack;
|
u32 max_stack;
|
||||||
u32 nr;
|
u32 nr;
|
||||||
short contexts;
|
short contexts;
|
||||||
bool contexts_maxed;
|
bool contexts_maxed;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
|
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
|
||||||
|
@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag)
|
||||||
* already stored in age order, the hw_idx should be 0.
|
* already stored in age order, the hw_idx should be 0.
|
||||||
*/
|
*/
|
||||||
struct perf_branch_stack {
|
struct perf_branch_stack {
|
||||||
__u64 nr;
|
u64 nr;
|
||||||
__u64 hw_idx;
|
u64 hw_idx;
|
||||||
struct perf_branch_entry entries[];
|
struct perf_branch_entry entries[];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -132,10 +124,10 @@ struct task_struct;
|
||||||
* extra PMU register associated with an event
|
* extra PMU register associated with an event
|
||||||
*/
|
*/
|
||||||
struct hw_perf_event_extra {
|
struct hw_perf_event_extra {
|
||||||
u64 config; /* register value */
|
u64 config; /* register value */
|
||||||
unsigned int reg; /* register address or index */
|
unsigned int reg; /* register address or index */
|
||||||
int alloc; /* extra register already allocated */
|
int alloc; /* extra register already allocated */
|
||||||
int idx; /* index in shared_regs->regs[] */
|
int idx; /* index in shared_regs->regs[] */
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -144,8 +136,8 @@ struct hw_perf_event_extra {
|
||||||
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
|
* PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific
|
||||||
* usage.
|
* usage.
|
||||||
*/
|
*/
|
||||||
#define PERF_EVENT_FLAG_ARCH 0x000fffff
|
#define PERF_EVENT_FLAG_ARCH 0x0fffffff
|
||||||
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
|
#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000
|
||||||
|
|
||||||
static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
|
static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0);
|
||||||
|
|
||||||
|
@ -157,7 +149,9 @@ struct hw_perf_event {
|
||||||
union {
|
union {
|
||||||
struct { /* hardware */
|
struct { /* hardware */
|
||||||
u64 config;
|
u64 config;
|
||||||
|
u64 config1;
|
||||||
u64 last_tag;
|
u64 last_tag;
|
||||||
|
u64 dyn_constraint;
|
||||||
unsigned long config_base;
|
unsigned long config_base;
|
||||||
unsigned long event_base;
|
unsigned long event_base;
|
||||||
int event_base_rdpmc;
|
int event_base_rdpmc;
|
||||||
|
@ -225,9 +219,14 @@ struct hw_perf_event {
|
||||||
/*
|
/*
|
||||||
* hw_perf_event::state flags; used to track the PERF_EF_* state.
|
* hw_perf_event::state flags; used to track the PERF_EF_* state.
|
||||||
*/
|
*/
|
||||||
#define PERF_HES_STOPPED 0x01 /* the counter is stopped */
|
|
||||||
#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */
|
/* the counter is stopped */
|
||||||
#define PERF_HES_ARCH 0x04
|
#define PERF_HES_STOPPED 0x01
|
||||||
|
|
||||||
|
/* event->count up-to-date */
|
||||||
|
#define PERF_HES_UPTODATE 0x02
|
||||||
|
|
||||||
|
#define PERF_HES_ARCH 0x04
|
||||||
|
|
||||||
int state;
|
int state;
|
||||||
|
|
||||||
|
@ -276,7 +275,7 @@ struct hw_perf_event {
|
||||||
*/
|
*/
|
||||||
u64 freq_time_stamp;
|
u64 freq_time_stamp;
|
||||||
u64 freq_count_stamp;
|
u64 freq_count_stamp;
|
||||||
#endif
|
#endif /* CONFIG_PERF_EVENTS */
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_event;
|
struct perf_event;
|
||||||
|
@ -285,28 +284,33 @@ struct perf_event_pmu_context;
|
||||||
/*
|
/*
|
||||||
* Common implementation detail of pmu::{start,commit,cancel}_txn
|
* Common implementation detail of pmu::{start,commit,cancel}_txn
|
||||||
*/
|
*/
|
||||||
#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */
|
|
||||||
#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */
|
/* txn to add/schedule event on PMU */
|
||||||
|
#define PERF_PMU_TXN_ADD 0x1
|
||||||
|
|
||||||
|
/* txn to read event group from PMU */
|
||||||
|
#define PERF_PMU_TXN_READ 0x2
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pmu::capabilities flags
|
* pmu::capabilities flags
|
||||||
*/
|
*/
|
||||||
#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
|
#define PERF_PMU_CAP_NO_INTERRUPT 0x0001
|
||||||
#define PERF_PMU_CAP_NO_NMI 0x0002
|
#define PERF_PMU_CAP_NO_NMI 0x0002
|
||||||
#define PERF_PMU_CAP_AUX_NO_SG 0x0004
|
#define PERF_PMU_CAP_AUX_NO_SG 0x0004
|
||||||
#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
|
#define PERF_PMU_CAP_EXTENDED_REGS 0x0008
|
||||||
#define PERF_PMU_CAP_EXCLUSIVE 0x0010
|
#define PERF_PMU_CAP_EXCLUSIVE 0x0010
|
||||||
#define PERF_PMU_CAP_ITRACE 0x0020
|
#define PERF_PMU_CAP_ITRACE 0x0020
|
||||||
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
|
#define PERF_PMU_CAP_NO_EXCLUDE 0x0040
|
||||||
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
|
#define PERF_PMU_CAP_AUX_OUTPUT 0x0080
|
||||||
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
|
#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100
|
||||||
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
|
#define PERF_PMU_CAP_AUX_PAUSE 0x0200
|
||||||
|
#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* pmu::scope
|
* pmu::scope
|
||||||
*/
|
*/
|
||||||
enum perf_pmu_scope {
|
enum perf_pmu_scope {
|
||||||
PERF_PMU_SCOPE_NONE = 0,
|
PERF_PMU_SCOPE_NONE = 0,
|
||||||
PERF_PMU_SCOPE_CORE,
|
PERF_PMU_SCOPE_CORE,
|
||||||
PERF_PMU_SCOPE_DIE,
|
PERF_PMU_SCOPE_DIE,
|
||||||
PERF_PMU_SCOPE_CLUSTER,
|
PERF_PMU_SCOPE_CLUSTER,
|
||||||
|
@ -325,6 +329,9 @@ struct perf_output_handle;
|
||||||
struct pmu {
|
struct pmu {
|
||||||
struct list_head entry;
|
struct list_head entry;
|
||||||
|
|
||||||
|
spinlock_t events_lock;
|
||||||
|
struct list_head events;
|
||||||
|
|
||||||
struct module *module;
|
struct module *module;
|
||||||
struct device *dev;
|
struct device *dev;
|
||||||
struct device *parent;
|
struct device *parent;
|
||||||
|
@ -387,11 +394,21 @@ struct pmu {
|
||||||
* Flags for ->add()/->del()/ ->start()/->stop(). There are
|
* Flags for ->add()/->del()/ ->start()/->stop(). There are
|
||||||
* matching hw_perf_event::state flags.
|
* matching hw_perf_event::state flags.
|
||||||
*/
|
*/
|
||||||
#define PERF_EF_START 0x01 /* start the counter when adding */
|
|
||||||
#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */
|
/* start the counter when adding */
|
||||||
#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
|
#define PERF_EF_START 0x01
|
||||||
#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */
|
|
||||||
#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */
|
/* reload the counter when starting */
|
||||||
|
#define PERF_EF_RELOAD 0x02
|
||||||
|
|
||||||
|
/* update the counter when stopping */
|
||||||
|
#define PERF_EF_UPDATE 0x04
|
||||||
|
|
||||||
|
/* AUX area event, pause tracing */
|
||||||
|
#define PERF_EF_PAUSE 0x08
|
||||||
|
|
||||||
|
/* AUX area event, resume tracing */
|
||||||
|
#define PERF_EF_RESUME 0x10
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Adds/Removes a counter to/from the PMU, can be done inside a
|
* Adds/Removes a counter to/from the PMU, can be done inside a
|
||||||
|
@ -590,10 +607,10 @@ enum perf_addr_filter_action_t {
|
||||||
* This is a hardware-agnostic filter configuration as specified by the user.
|
* This is a hardware-agnostic filter configuration as specified by the user.
|
||||||
*/
|
*/
|
||||||
struct perf_addr_filter {
|
struct perf_addr_filter {
|
||||||
struct list_head entry;
|
struct list_head entry;
|
||||||
struct path path;
|
struct path path;
|
||||||
unsigned long offset;
|
unsigned long offset;
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
enum perf_addr_filter_action_t action;
|
enum perf_addr_filter_action_t action;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -608,23 +625,24 @@ struct perf_addr_filter {
|
||||||
* bundled together; see perf_event_addr_filters().
|
* bundled together; see perf_event_addr_filters().
|
||||||
*/
|
*/
|
||||||
struct perf_addr_filters_head {
|
struct perf_addr_filters_head {
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
raw_spinlock_t lock;
|
raw_spinlock_t lock;
|
||||||
unsigned int nr_file_filters;
|
unsigned int nr_file_filters;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_addr_filter_range {
|
struct perf_addr_filter_range {
|
||||||
unsigned long start;
|
unsigned long start;
|
||||||
unsigned long size;
|
unsigned long size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* enum perf_event_state - the states of an event:
|
* enum perf_event_state - the states of an event:
|
||||||
*/
|
*/
|
||||||
enum perf_event_state {
|
enum perf_event_state {
|
||||||
PERF_EVENT_STATE_DEAD = -4,
|
PERF_EVENT_STATE_DEAD = -5,
|
||||||
PERF_EVENT_STATE_EXIT = -3,
|
PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */
|
||||||
PERF_EVENT_STATE_ERROR = -2,
|
PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */
|
||||||
|
PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */
|
||||||
PERF_EVENT_STATE_OFF = -1,
|
PERF_EVENT_STATE_OFF = -1,
|
||||||
PERF_EVENT_STATE_INACTIVE = 0,
|
PERF_EVENT_STATE_INACTIVE = 0,
|
||||||
PERF_EVENT_STATE_ACTIVE = 1,
|
PERF_EVENT_STATE_ACTIVE = 1,
|
||||||
|
@ -662,24 +680,24 @@ struct swevent_hlist {
|
||||||
struct rcu_head rcu_head;
|
struct rcu_head rcu_head;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define PERF_ATTACH_CONTEXT 0x0001
|
#define PERF_ATTACH_CONTEXT 0x0001
|
||||||
#define PERF_ATTACH_GROUP 0x0002
|
#define PERF_ATTACH_GROUP 0x0002
|
||||||
#define PERF_ATTACH_TASK 0x0004
|
#define PERF_ATTACH_TASK 0x0004
|
||||||
#define PERF_ATTACH_TASK_DATA 0x0008
|
#define PERF_ATTACH_TASK_DATA 0x0008
|
||||||
#define PERF_ATTACH_GLOBAL_DATA 0x0010
|
#define PERF_ATTACH_GLOBAL_DATA 0x0010
|
||||||
#define PERF_ATTACH_SCHED_CB 0x0020
|
#define PERF_ATTACH_SCHED_CB 0x0020
|
||||||
#define PERF_ATTACH_CHILD 0x0040
|
#define PERF_ATTACH_CHILD 0x0040
|
||||||
#define PERF_ATTACH_EXCLUSIVE 0x0080
|
#define PERF_ATTACH_EXCLUSIVE 0x0080
|
||||||
#define PERF_ATTACH_CALLCHAIN 0x0100
|
#define PERF_ATTACH_CALLCHAIN 0x0100
|
||||||
#define PERF_ATTACH_ITRACE 0x0200
|
#define PERF_ATTACH_ITRACE 0x0200
|
||||||
|
|
||||||
struct bpf_prog;
|
struct bpf_prog;
|
||||||
struct perf_cgroup;
|
struct perf_cgroup;
|
||||||
struct perf_buffer;
|
struct perf_buffer;
|
||||||
|
|
||||||
struct pmu_event_list {
|
struct pmu_event_list {
|
||||||
raw_spinlock_t lock;
|
raw_spinlock_t lock;
|
||||||
struct list_head list;
|
struct list_head list;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -689,12 +707,12 @@ struct pmu_event_list {
|
||||||
* disabled is sufficient since it will hold-off the IPIs.
|
* disabled is sufficient since it will hold-off the IPIs.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_PROVE_LOCKING
|
#ifdef CONFIG_PROVE_LOCKING
|
||||||
#define lockdep_assert_event_ctx(event) \
|
# define lockdep_assert_event_ctx(event) \
|
||||||
WARN_ON_ONCE(__lockdep_enabled && \
|
WARN_ON_ONCE(__lockdep_enabled && \
|
||||||
(this_cpu_read(hardirqs_enabled) && \
|
(this_cpu_read(hardirqs_enabled) && \
|
||||||
lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
|
lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD))
|
||||||
#else
|
#else
|
||||||
#define lockdep_assert_event_ctx(event)
|
# define lockdep_assert_event_ctx(event)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define for_each_sibling_event(sibling, event) \
|
#define for_each_sibling_event(sibling, event) \
|
||||||
|
@ -852,9 +870,9 @@ struct perf_event {
|
||||||
#ifdef CONFIG_EVENT_TRACING
|
#ifdef CONFIG_EVENT_TRACING
|
||||||
struct trace_event_call *tp_event;
|
struct trace_event_call *tp_event;
|
||||||
struct event_filter *filter;
|
struct event_filter *filter;
|
||||||
#ifdef CONFIG_FUNCTION_TRACER
|
# ifdef CONFIG_FUNCTION_TRACER
|
||||||
struct ftrace_ops ftrace_ops;
|
struct ftrace_ops ftrace_ops;
|
||||||
#endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_CGROUP_PERF
|
#ifdef CONFIG_CGROUP_PERF
|
||||||
|
@ -865,6 +883,7 @@ struct perf_event {
|
||||||
void *security;
|
void *security;
|
||||||
#endif
|
#endif
|
||||||
struct list_head sb_list;
|
struct list_head sb_list;
|
||||||
|
struct list_head pmu_list;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Certain events gets forwarded to another pmu internally by over-
|
* Certain events gets forwarded to another pmu internally by over-
|
||||||
|
@ -872,7 +891,7 @@ struct perf_event {
|
||||||
* of it. event->orig_type contains original 'type' requested by
|
* of it. event->orig_type contains original 'type' requested by
|
||||||
* user.
|
* user.
|
||||||
*/
|
*/
|
||||||
__u32 orig_type;
|
u32 orig_type;
|
||||||
#endif /* CONFIG_PERF_EVENTS */
|
#endif /* CONFIG_PERF_EVENTS */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -937,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
|
||||||
}
|
}
|
||||||
|
|
||||||
struct perf_event_groups {
|
struct perf_event_groups {
|
||||||
struct rb_root tree;
|
struct rb_root tree;
|
||||||
u64 index;
|
u64 index;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -1155,7 +1174,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags);
|
||||||
extern void perf_event_itrace_started(struct perf_event *event);
|
extern void perf_event_itrace_started(struct perf_event *event);
|
||||||
|
|
||||||
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
|
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
|
||||||
extern void perf_pmu_unregister(struct pmu *pmu);
|
extern int perf_pmu_unregister(struct pmu *pmu);
|
||||||
|
|
||||||
extern void __perf_event_task_sched_in(struct task_struct *prev,
|
extern void __perf_event_task_sched_in(struct task_struct *prev,
|
||||||
struct task_struct *task);
|
struct task_struct *task);
|
||||||
|
@ -1181,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu);
|
||||||
extern int perf_event_refresh(struct perf_event *event, int refresh);
|
extern int perf_event_refresh(struct perf_event *event, int refresh);
|
||||||
extern void perf_event_update_userpage(struct perf_event *event);
|
extern void perf_event_update_userpage(struct perf_event *event);
|
||||||
extern int perf_event_release_kernel(struct perf_event *event);
|
extern int perf_event_release_kernel(struct perf_event *event);
|
||||||
|
|
||||||
extern struct perf_event *
|
extern struct perf_event *
|
||||||
perf_event_create_kernel_counter(struct perf_event_attr *attr,
|
perf_event_create_kernel_counter(struct perf_event_attr *attr,
|
||||||
int cpu,
|
int cpu,
|
||||||
struct task_struct *task,
|
struct task_struct *task,
|
||||||
perf_overflow_handler_t callback,
|
perf_overflow_handler_t callback,
|
||||||
void *context);
|
void *context);
|
||||||
|
|
||||||
extern void perf_pmu_migrate_context(struct pmu *pmu,
|
extern void perf_pmu_migrate_context(struct pmu *pmu,
|
||||||
int src_cpu, int dst_cpu);
|
int src_cpu, int dst_cpu);
|
||||||
int perf_event_read_local(struct perf_event *event, u64 *value,
|
extern int perf_event_read_local(struct perf_event *event, u64 *value,
|
||||||
u64 *enabled, u64 *running);
|
u64 *enabled, u64 *running);
|
||||||
extern u64 perf_event_read_value(struct perf_event *event,
|
extern u64 perf_event_read_value(struct perf_event *event,
|
||||||
u64 *enabled, u64 *running);
|
u64 *enabled, u64 *running);
|
||||||
|
|
||||||
|
@ -1407,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data,
|
||||||
*/
|
*/
|
||||||
static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
|
static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br)
|
||||||
{
|
{
|
||||||
br->mispred = 0;
|
br->mispred = 0;
|
||||||
br->predicted = 0;
|
br->predicted = 0;
|
||||||
br->in_tx = 0;
|
br->in_tx = 0;
|
||||||
br->abort = 0;
|
br->abort = 0;
|
||||||
br->cycles = 0;
|
br->cycles = 0;
|
||||||
br->type = 0;
|
br->type = 0;
|
||||||
br->spec = PERF_BR_SPEC_NA;
|
br->spec = PERF_BR_SPEC_NA;
|
||||||
br->reserved = 0;
|
br->reserved = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void perf_output_sample(struct perf_output_handle *handle,
|
extern void perf_output_sample(struct perf_output_handle *handle,
|
||||||
|
@ -1603,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog,
|
||||||
enum perf_bpf_event_type type,
|
enum perf_bpf_event_type type,
|
||||||
u16 flags);
|
u16 flags);
|
||||||
|
|
||||||
|
#define PERF_GUEST_ACTIVE 0x01
|
||||||
|
#define PERF_GUEST_USER 0x02
|
||||||
|
|
||||||
|
struct perf_guest_info_callbacks {
|
||||||
|
unsigned int (*state)(void);
|
||||||
|
unsigned long (*get_ip)(void);
|
||||||
|
unsigned int (*handle_intel_pt_intr)(void);
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef CONFIG_GUEST_PERF_EVENTS
|
#ifdef CONFIG_GUEST_PERF_EVENTS
|
||||||
|
|
||||||
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
|
extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs;
|
||||||
|
|
||||||
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
|
DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state);
|
||||||
|
@ -1614,21 +1645,27 @@ static inline unsigned int perf_guest_state(void)
|
||||||
{
|
{
|
||||||
return static_call(__perf_guest_state)();
|
return static_call(__perf_guest_state)();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned long perf_guest_get_ip(void)
|
static inline unsigned long perf_guest_get_ip(void)
|
||||||
{
|
{
|
||||||
return static_call(__perf_guest_get_ip)();
|
return static_call(__perf_guest_get_ip)();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
|
static inline unsigned int perf_guest_handle_intel_pt_intr(void)
|
||||||
{
|
{
|
||||||
return static_call(__perf_guest_handle_intel_pt_intr)();
|
return static_call(__perf_guest_handle_intel_pt_intr)();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
|
extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
|
||||||
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
|
extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs);
|
||||||
#else
|
|
||||||
|
#else /* !CONFIG_GUEST_PERF_EVENTS: */
|
||||||
|
|
||||||
static inline unsigned int perf_guest_state(void) { return 0; }
|
static inline unsigned int perf_guest_state(void) { return 0; }
|
||||||
static inline unsigned long perf_guest_get_ip(void) { return 0; }
|
static inline unsigned long perf_guest_get_ip(void) { return 0; }
|
||||||
static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
|
static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; }
|
||||||
#endif /* CONFIG_GUEST_PERF_EVENTS */
|
|
||||||
|
#endif /* !CONFIG_GUEST_PERF_EVENTS */
|
||||||
|
|
||||||
extern void perf_event_exec(void);
|
extern void perf_event_exec(void);
|
||||||
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
extern void perf_event_comm(struct task_struct *tsk, bool exec);
|
||||||
|
@ -1658,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx *
|
||||||
{
|
{
|
||||||
if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
|
if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) {
|
||||||
struct perf_callchain_entry *entry = ctx->entry;
|
struct perf_callchain_entry *entry = ctx->entry;
|
||||||
|
|
||||||
entry->ip[entry->nr++] = ip;
|
entry->ip[entry->nr++] = ip;
|
||||||
++ctx->contexts;
|
++ctx->contexts;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1671,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64
|
||||||
{
|
{
|
||||||
if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
|
if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) {
|
||||||
struct perf_callchain_entry *entry = ctx->entry;
|
struct perf_callchain_entry *entry = ctx->entry;
|
||||||
|
|
||||||
entry->ip[entry->nr++] = ip;
|
entry->ip[entry->nr++] = ip;
|
||||||
++ctx->nr;
|
++ctx->nr;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -1697,7 +1736,7 @@ static inline int perf_is_paranoid(void)
|
||||||
return sysctl_perf_event_paranoid > -1;
|
return sysctl_perf_event_paranoid > -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
int perf_allow_kernel(void);
|
extern int perf_allow_kernel(void);
|
||||||
|
|
||||||
static inline int perf_allow_cpu(void)
|
static inline int perf_allow_cpu(void)
|
||||||
{
|
{
|
||||||
|
@ -1760,7 +1799,7 @@ static inline bool needs_branch_stack(struct perf_event *event)
|
||||||
|
|
||||||
static inline bool has_aux(struct perf_event *event)
|
static inline bool has_aux(struct perf_event *event)
|
||||||
{
|
{
|
||||||
return event->pmu->setup_aux;
|
return event->pmu && event->pmu->setup_aux;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool has_aux_action(struct perf_event *event)
|
static inline bool has_aux_action(struct perf_event *event)
|
||||||
|
@ -1819,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle,
|
||||||
|
|
||||||
extern void perf_output_end(struct perf_output_handle *handle);
|
extern void perf_output_end(struct perf_output_handle *handle);
|
||||||
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
|
extern unsigned int perf_output_copy(struct perf_output_handle *handle,
|
||||||
const void *buf, unsigned int len);
|
const void *buf, unsigned int len);
|
||||||
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
|
extern unsigned int perf_output_skip(struct perf_output_handle *handle,
|
||||||
unsigned int len);
|
unsigned int len);
|
||||||
extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
|
extern long perf_output_copy_aux(struct perf_output_handle *aux_handle,
|
||||||
|
@ -1836,7 +1875,9 @@ extern void perf_event_task_tick(void);
|
||||||
extern int perf_event_account_interrupt(struct perf_event *event);
|
extern int perf_event_account_interrupt(struct perf_event *event);
|
||||||
extern int perf_event_period(struct perf_event *event, u64 value);
|
extern int perf_event_period(struct perf_event *event, u64 value);
|
||||||
extern u64 perf_event_pause(struct perf_event *event, bool reset);
|
extern u64 perf_event_pause(struct perf_event *event, bool reset);
|
||||||
|
|
||||||
#else /* !CONFIG_PERF_EVENTS: */
|
#else /* !CONFIG_PERF_EVENTS: */
|
||||||
|
|
||||||
static inline void *
|
static inline void *
|
||||||
perf_aux_output_begin(struct perf_output_handle *handle,
|
perf_aux_output_begin(struct perf_output_handle *handle,
|
||||||
struct perf_event *event) { return NULL; }
|
struct perf_event *event) { return NULL; }
|
||||||
|
@ -1914,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event) { }
|
||||||
static inline int __perf_event_disable(void *info) { return -1; }
|
static inline int __perf_event_disable(void *info) { return -1; }
|
||||||
static inline void perf_event_task_tick(void) { }
|
static inline void perf_event_task_tick(void) { }
|
||||||
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
|
static inline int perf_event_release_kernel(struct perf_event *event) { return 0; }
|
||||||
static inline int perf_event_period(struct perf_event *event, u64 value)
|
static inline int
|
||||||
{
|
perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; }
|
||||||
return -EINVAL;
|
static inline u64
|
||||||
}
|
perf_event_pause(struct perf_event *event, bool reset) { return 0; }
|
||||||
static inline u64 perf_event_pause(struct perf_event *event, bool reset)
|
static inline int
|
||||||
{
|
perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; }
|
||||||
return 0;
|
|
||||||
}
|
#endif /* !CONFIG_PERF_EVENTS */
|
||||||
static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
|
||||||
extern void perf_restore_debug_store(void);
|
extern void perf_restore_debug_store(void);
|
||||||
|
@ -1934,31 +1970,31 @@ extern void perf_restore_debug_store(void);
|
||||||
static inline void perf_restore_debug_store(void) { }
|
static inline void perf_restore_debug_store(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
|
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
|
||||||
|
|
||||||
struct perf_pmu_events_attr {
|
struct perf_pmu_events_attr {
|
||||||
struct device_attribute attr;
|
struct device_attribute attr;
|
||||||
u64 id;
|
u64 id;
|
||||||
const char *event_str;
|
const char *event_str;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_pmu_events_ht_attr {
|
struct perf_pmu_events_ht_attr {
|
||||||
struct device_attribute attr;
|
struct device_attribute attr;
|
||||||
u64 id;
|
u64 id;
|
||||||
const char *event_str_ht;
|
const char *event_str_ht;
|
||||||
const char *event_str_noht;
|
const char *event_str_noht;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_pmu_events_hybrid_attr {
|
struct perf_pmu_events_hybrid_attr {
|
||||||
struct device_attribute attr;
|
struct device_attribute attr;
|
||||||
u64 id;
|
u64 id;
|
||||||
const char *event_str;
|
const char *event_str;
|
||||||
u64 pmu_type;
|
u64 pmu_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct perf_pmu_format_hybrid_attr {
|
struct perf_pmu_format_hybrid_attr {
|
||||||
struct device_attribute attr;
|
struct device_attribute attr;
|
||||||
u64 pmu_type;
|
u64 pmu_type;
|
||||||
};
|
};
|
||||||
|
|
||||||
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
|
ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||||
|
@ -2000,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name)
|
||||||
|
|
||||||
/* Performance counter hotplug functions */
|
/* Performance counter hotplug functions */
|
||||||
#ifdef CONFIG_PERF_EVENTS
|
#ifdef CONFIG_PERF_EVENTS
|
||||||
int perf_event_init_cpu(unsigned int cpu);
|
extern int perf_event_init_cpu(unsigned int cpu);
|
||||||
int perf_event_exit_cpu(unsigned int cpu);
|
extern int perf_event_exit_cpu(unsigned int cpu);
|
||||||
#else
|
#else
|
||||||
#define perf_event_init_cpu NULL
|
# define perf_event_init_cpu NULL
|
||||||
#define perf_event_exit_cpu NULL
|
# define perf_event_exit_cpu NULL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern void arch_perf_update_userpage(struct perf_event *event,
|
extern void arch_perf_update_userpage(struct perf_event *event,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -2069,11 +2069,6 @@ static struct cpuhp_step cpuhp_hp_states[] = {
|
||||||
.teardown.single = NULL,
|
.teardown.single = NULL,
|
||||||
.cant_stop = true,
|
.cant_stop = true,
|
||||||
},
|
},
|
||||||
[CPUHP_PERF_PREPARE] = {
|
|
||||||
.name = "perf:prepare",
|
|
||||||
.startup.single = perf_event_init_cpu,
|
|
||||||
.teardown.single = perf_event_exit_cpu,
|
|
||||||
},
|
|
||||||
[CPUHP_RANDOM_PREPARE] = {
|
[CPUHP_RANDOM_PREPARE] = {
|
||||||
.name = "random:prepare",
|
.name = "random:prepare",
|
||||||
.startup.single = random_prepare_cpu,
|
.startup.single = random_prepare_cpu,
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -679,7 +679,15 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
||||||
{
|
{
|
||||||
bool overwrite = !(flags & RING_BUFFER_WRITABLE);
|
bool overwrite = !(flags & RING_BUFFER_WRITABLE);
|
||||||
int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
|
int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
|
||||||
int ret = -ENOMEM, max_order;
|
bool use_contiguous_pages = event->pmu->capabilities & (
|
||||||
|
PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_PREFER_LARGE);
|
||||||
|
/*
|
||||||
|
* Initialize max_order to 0 for page allocation. This allocates single
|
||||||
|
* pages to minimize memory fragmentation. This is overridden if the
|
||||||
|
* PMU needs or prefers contiguous pages (use_contiguous_pages = true).
|
||||||
|
*/
|
||||||
|
int max_order = 0;
|
||||||
|
int ret = -ENOMEM;
|
||||||
|
|
||||||
if (!has_aux(event))
|
if (!has_aux(event))
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
@ -689,8 +697,8 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
||||||
|
|
||||||
if (!overwrite) {
|
if (!overwrite) {
|
||||||
/*
|
/*
|
||||||
* Watermark defaults to half the buffer, and so does the
|
* Watermark defaults to half the buffer, to aid PMU drivers
|
||||||
* max_order, to aid PMU drivers in double buffering.
|
* in double buffering.
|
||||||
*/
|
*/
|
||||||
if (!watermark)
|
if (!watermark)
|
||||||
watermark = min_t(unsigned long,
|
watermark = min_t(unsigned long,
|
||||||
|
@ -698,16 +706,19 @@ int rb_alloc_aux(struct perf_buffer *rb, struct perf_event *event,
|
||||||
(unsigned long)nr_pages << (PAGE_SHIFT - 1));
|
(unsigned long)nr_pages << (PAGE_SHIFT - 1));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use aux_watermark as the basis for chunking to
|
* If using contiguous pages, use aux_watermark as the basis
|
||||||
* help PMU drivers honor the watermark.
|
* for chunking to help PMU drivers honor the watermark.
|
||||||
*/
|
*/
|
||||||
max_order = get_order(watermark);
|
if (use_contiguous_pages)
|
||||||
|
max_order = get_order(watermark);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* We need to start with the max_order that fits in nr_pages,
|
* If using contiguous pages, we need to start with the
|
||||||
* not the other way around, hence ilog2() and not get_order.
|
* max_order that fits in nr_pages, not the other way around,
|
||||||
|
* hence ilog2() and not get_order.
|
||||||
*/
|
*/
|
||||||
max_order = ilog2(nr_pages);
|
if (use_contiguous_pages)
|
||||||
|
max_order = ilog2(nr_pages);
|
||||||
watermark = 0;
|
watermark = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -526,6 +526,12 @@ extern const struct bench bench_trig_uprobe_multi_push;
|
||||||
extern const struct bench bench_trig_uretprobe_multi_push;
|
extern const struct bench bench_trig_uretprobe_multi_push;
|
||||||
extern const struct bench bench_trig_uprobe_multi_ret;
|
extern const struct bench bench_trig_uprobe_multi_ret;
|
||||||
extern const struct bench bench_trig_uretprobe_multi_ret;
|
extern const struct bench bench_trig_uretprobe_multi_ret;
|
||||||
|
#ifdef __x86_64__
|
||||||
|
extern const struct bench bench_trig_uprobe_nop5;
|
||||||
|
extern const struct bench bench_trig_uretprobe_nop5;
|
||||||
|
extern const struct bench bench_trig_uprobe_multi_nop5;
|
||||||
|
extern const struct bench bench_trig_uretprobe_multi_nop5;
|
||||||
|
#endif
|
||||||
|
|
||||||
extern const struct bench bench_rb_libbpf;
|
extern const struct bench bench_rb_libbpf;
|
||||||
extern const struct bench bench_rb_custom;
|
extern const struct bench bench_rb_custom;
|
||||||
|
@ -586,6 +592,12 @@ static const struct bench *benchs[] = {
|
||||||
&bench_trig_uretprobe_multi_push,
|
&bench_trig_uretprobe_multi_push,
|
||||||
&bench_trig_uprobe_multi_ret,
|
&bench_trig_uprobe_multi_ret,
|
||||||
&bench_trig_uretprobe_multi_ret,
|
&bench_trig_uretprobe_multi_ret,
|
||||||
|
#ifdef __x86_64__
|
||||||
|
&bench_trig_uprobe_nop5,
|
||||||
|
&bench_trig_uretprobe_nop5,
|
||||||
|
&bench_trig_uprobe_multi_nop5,
|
||||||
|
&bench_trig_uretprobe_multi_nop5,
|
||||||
|
#endif
|
||||||
/* ringbuf/perfbuf benchmarks */
|
/* ringbuf/perfbuf benchmarks */
|
||||||
&bench_rb_libbpf,
|
&bench_rb_libbpf,
|
||||||
&bench_rb_custom,
|
&bench_rb_custom,
|
||||||
|
|
|
@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
__nocf_check __weak void uprobe_target_nop5(void)
|
||||||
|
{
|
||||||
|
asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void *uprobe_producer_nop5(void *input)
|
||||||
|
{
|
||||||
|
while (true)
|
||||||
|
uprobe_target_nop5();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
|
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
|
||||||
{
|
{
|
||||||
size_t uprobe_offset;
|
size_t uprobe_offset;
|
||||||
|
@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void)
|
||||||
usetup(true, true /* use_multi */, &uprobe_target_ret);
|
usetup(true, true /* use_multi */, &uprobe_target_ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __x86_64__
|
||||||
|
static void uprobe_nop5_setup(void)
|
||||||
|
{
|
||||||
|
usetup(false, false /* !use_multi */, &uprobe_target_nop5);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void uretprobe_nop5_setup(void)
|
||||||
|
{
|
||||||
|
usetup(true, false /* !use_multi */, &uprobe_target_nop5);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void uprobe_multi_nop5_setup(void)
|
||||||
|
{
|
||||||
|
usetup(false, true /* use_multi */, &uprobe_target_nop5);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void uretprobe_multi_nop5_setup(void)
|
||||||
|
{
|
||||||
|
usetup(true, true /* use_multi */, &uprobe_target_nop5);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
const struct bench bench_trig_syscall_count = {
|
const struct bench bench_trig_syscall_count = {
|
||||||
.name = "trig-syscall-count",
|
.name = "trig-syscall-count",
|
||||||
.validate = trigger_validate,
|
.validate = trigger_validate,
|
||||||
|
@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
|
||||||
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
|
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
|
||||||
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
|
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
|
||||||
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
|
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
|
||||||
|
#ifdef __x86_64__
|
||||||
|
BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
|
||||||
|
BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
|
||||||
|
BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
|
||||||
|
BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
|
||||||
|
#endif
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
set -eufo pipefail
|
set -eufo pipefail
|
||||||
|
|
||||||
for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
|
for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
|
||||||
do
|
do
|
||||||
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
|
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
|
||||||
printf "%-15s: %s\n" $i "$summary"
|
printf "%-15s: %s\n" $i "$summary"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user