mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 15:03:53 +02:00
- Avoid a crash on a heterogeneous machine where not all cores support the
same hw events features - Avoid a deadlock when throttling events - Document the perf event states more - Make sure a number of perf paths switching off or rescheduling events call perf_cgroup_event_disable() - Make sure perf does task sampling before its userspace mapping is torn down, and not after -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmhXvYYACgkQEsHwGGHe VUotdg//TXchNnZ9xcGKSTFphDQMWVIy1cRWUffWC5ewUhjE9H7+FMZvCmvih8uc uvAsZ92GXE64fuzF0tU/5ybWEgca6HPbgI8aOhnk+vo9Yzxj9/0eO0SKK8qqSvzo ecn/p9yX4/jD86kIo6K279z7ZX8/0tSLselnicrGy1r4RGuaebEAvXDEzZm8p/c6 0MjaTGC4TzkZkGyEeWXRt7jewiWvXO+91TqqwMyrhmIG3cs2TCbPhSn0QowXUZsF PdCJA5Z+vKp6j8n8fohRTFoATSRw5xAoqT+JRmPZ2K3QOCwtf1X0MbM6ZKkapgZO Y4Tp3HPw9yHUu8cyvEEwqU0jDn4J0EaqFgwCrxzvQj9ufkHBlPgNahjXW5upcw4k TV3qEp6KKfywTWWExh6Gjie7y7Hq3aHOkJVCg/ZeQjwMXhpZg7z+mGwh7x08Jn/2 9/bpLG8Gl8eto3G6L1px/NUMc4poZTbSheKrjEMt3Z6ErNoAR4gb7SO547Lvf8HK bty5NZftDUNv42bqqXI0GY7YXKkr1AtHdRDlTeLlc5YmPzhIyG3LgEi4BqN3gyFf emh/CFG/1KT8GWxNCrPW6d01TBRswZjFyBDHL89HO3i0r2nDe98+2fLmllnl2Bv2 EadgGE1XWv6RB5APJ726HXqMgtXM9cHRMogKMhiHNZnwkQba+ug= =nnMl -----END PGP SIGNATURE----- Merge tag 'perf_urgent_for_v6.16_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf fixes from Borislav Petkov: - Avoid a crash on a heterogeneous machine where not all cores support the same hw events features - Avoid a deadlock when throttling events - Document the perf event states more - Make sure a number of perf paths switching off or rescheduling events call perf_cgroup_event_disable() - Make sure perf does task sampling before its userspace mapping is torn down, and not after * tag 'perf_urgent_for_v6.16_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Fix crash in icl_update_topdown_event() perf: Fix the throttle error of some clock events perf: Add comment to enum perf_event_state perf/core: Fix WARN in perf_cgroup_switch() perf: Fix dangling cgroup pointer in cpuctx perf: Fix cgroup state vs ERROR perf: Fix sample vs do_exit()
This commit is contained in:
commit
17ef32ae66
|
@ -2826,7 +2826,7 @@ static void intel_pmu_read_event(struct perf_event *event)
|
|||
* If the PEBS counters snapshotting is enabled,
|
||||
* the topdown event is available in PEBS records.
|
||||
*/
|
||||
if (is_topdown_event(event) && !is_pebs_counter_event_group(event))
|
||||
if (is_topdown_count(event) && !is_pebs_counter_event_group(event))
|
||||
static_call(intel_pmu_update_topdown_event)(event, NULL);
|
||||
else
|
||||
intel_pmu_drain_pebs_buffer();
|
||||
|
|
|
@ -635,8 +635,46 @@ struct perf_addr_filter_range {
|
|||
unsigned long size;
|
||||
};
|
||||
|
||||
/**
|
||||
* enum perf_event_state - the states of an event:
|
||||
/*
|
||||
* The normal states are:
|
||||
*
|
||||
* ACTIVE --.
|
||||
* ^ |
|
||||
* | |
|
||||
* sched_{in,out}() |
|
||||
* | |
|
||||
* v |
|
||||
* ,---> INACTIVE --+ <-.
|
||||
* | | |
|
||||
* | {dis,en}able()
|
||||
* sched_in() | |
|
||||
* | OFF <--' --+
|
||||
* | |
|
||||
* `---> ERROR ------'
|
||||
*
|
||||
* That is:
|
||||
*
|
||||
* sched_in: INACTIVE -> {ACTIVE,ERROR}
|
||||
* sched_out: ACTIVE -> INACTIVE
|
||||
* disable: {ACTIVE,INACTIVE} -> OFF
|
||||
* enable: {OFF,ERROR} -> INACTIVE
|
||||
*
|
||||
* Where {OFF,ERROR} are disabled states.
|
||||
*
|
||||
* Then we have the {EXIT,REVOKED,DEAD} states which are various shades of
|
||||
* defunct events:
|
||||
*
|
||||
* - EXIT means task that the even was assigned to died, but child events
|
||||
* still live, and further children can still be created. But the event
|
||||
* itself will never be active again. It can only transition to
|
||||
* {REVOKED,DEAD};
|
||||
*
|
||||
* - REVOKED means the PMU the event was associated with is gone; all
|
||||
* functionality is stopped but the event is still alive. Can only
|
||||
* transition to DEAD;
|
||||
*
|
||||
* - DEAD event really is DYING tearing down state and freeing bits.
|
||||
*
|
||||
*/
|
||||
enum perf_event_state {
|
||||
PERF_EVENT_STATE_DEAD = -5,
|
||||
|
|
|
@ -207,6 +207,19 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
|
|||
__perf_ctx_unlock(&cpuctx->ctx);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct perf_cpu_context *cpuctx;
|
||||
struct perf_event_context *ctx;
|
||||
} class_perf_ctx_lock_t;
|
||||
|
||||
static inline void class_perf_ctx_lock_destructor(class_perf_ctx_lock_t *_T)
|
||||
{ perf_ctx_unlock(_T->cpuctx, _T->ctx); }
|
||||
|
||||
static inline class_perf_ctx_lock_t
|
||||
class_perf_ctx_lock_constructor(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{ perf_ctx_lock(cpuctx, ctx); return (class_perf_ctx_lock_t){ cpuctx, ctx }; }
|
||||
|
||||
#define TASK_TOMBSTONE ((void *)-1L)
|
||||
|
||||
static bool is_kernel_event(struct perf_event *event)
|
||||
|
@ -944,7 +957,13 @@ static void perf_cgroup_switch(struct task_struct *task)
|
|||
if (READ_ONCE(cpuctx->cgrp) == cgrp)
|
||||
return;
|
||||
|
||||
perf_ctx_lock(cpuctx, cpuctx->task_ctx);
|
||||
guard(perf_ctx_lock)(cpuctx, cpuctx->task_ctx);
|
||||
/*
|
||||
* Re-check, could've raced vs perf_remove_from_context().
|
||||
*/
|
||||
if (READ_ONCE(cpuctx->cgrp) == NULL)
|
||||
return;
|
||||
|
||||
perf_ctx_disable(&cpuctx->ctx, true);
|
||||
|
||||
ctx_sched_out(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
|
||||
|
@ -962,7 +981,6 @@ static void perf_cgroup_switch(struct task_struct *task)
|
|||
ctx_sched_in(&cpuctx->ctx, NULL, EVENT_ALL|EVENT_CGROUP);
|
||||
|
||||
perf_ctx_enable(&cpuctx->ctx, true);
|
||||
perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
|
||||
}
|
||||
|
||||
static int perf_cgroup_ensure_storage(struct perf_event *event,
|
||||
|
@ -2120,18 +2138,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
|
|||
if (event->group_leader == event)
|
||||
del_event_from_groups(event, ctx);
|
||||
|
||||
/*
|
||||
* If event was in error state, then keep it
|
||||
* that way, otherwise bogus counts will be
|
||||
* returned on read(). The only way to get out
|
||||
* of error state is by explicit re-enabling
|
||||
* of the event
|
||||
*/
|
||||
if (event->state > PERF_EVENT_STATE_OFF) {
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
|
||||
}
|
||||
|
||||
ctx->generation++;
|
||||
event->pmu_ctx->nr_events--;
|
||||
}
|
||||
|
@ -2149,8 +2155,9 @@ perf_aux_output_match(struct perf_event *event, struct perf_event *aux_event)
|
|||
}
|
||||
|
||||
static void put_event(struct perf_event *event);
|
||||
static void event_sched_out(struct perf_event *event,
|
||||
struct perf_event_context *ctx);
|
||||
static void __event_disable(struct perf_event *event,
|
||||
struct perf_event_context *ctx,
|
||||
enum perf_event_state state);
|
||||
|
||||
static void perf_put_aux_event(struct perf_event *event)
|
||||
{
|
||||
|
@ -2183,8 +2190,7 @@ static void perf_put_aux_event(struct perf_event *event)
|
|||
* state so that we don't try to schedule it again. Note
|
||||
* that perf_event_enable() will clear the ERROR status.
|
||||
*/
|
||||
event_sched_out(iter, ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
|
||||
__event_disable(iter, ctx, PERF_EVENT_STATE_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2242,18 +2248,6 @@ static inline struct list_head *get_event_list(struct perf_event *event)
|
|||
&event->pmu_ctx->flexible_active;
|
||||
}
|
||||
|
||||
/*
|
||||
* Events that have PERF_EV_CAP_SIBLING require being part of a group and
|
||||
* cannot exist on their own, schedule them out and move them into the ERROR
|
||||
* state. Also see _perf_event_enable(), it will not be able to recover
|
||||
* this ERROR state.
|
||||
*/
|
||||
static inline void perf_remove_sibling_event(struct perf_event *event)
|
||||
{
|
||||
event_sched_out(event, event->ctx);
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
|
||||
}
|
||||
|
||||
static void perf_group_detach(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *leader = event->group_leader;
|
||||
|
@ -2289,8 +2283,15 @@ static void perf_group_detach(struct perf_event *event)
|
|||
*/
|
||||
list_for_each_entry_safe(sibling, tmp, &event->sibling_list, sibling_list) {
|
||||
|
||||
/*
|
||||
* Events that have PERF_EV_CAP_SIBLING require being part of
|
||||
* a group and cannot exist on their own, schedule them out
|
||||
* and move them into the ERROR state. Also see
|
||||
* _perf_event_enable(), it will not be able to recover this
|
||||
* ERROR state.
|
||||
*/
|
||||
if (sibling->event_caps & PERF_EV_CAP_SIBLING)
|
||||
perf_remove_sibling_event(sibling);
|
||||
__event_disable(sibling, ctx, PERF_EVENT_STATE_ERROR);
|
||||
|
||||
sibling->group_leader = sibling;
|
||||
list_del_init(&sibling->sibling_list);
|
||||
|
@ -2493,11 +2494,14 @@ __perf_remove_from_context(struct perf_event *event,
|
|||
state = PERF_EVENT_STATE_EXIT;
|
||||
if (flags & DETACH_REVOKE)
|
||||
state = PERF_EVENT_STATE_REVOKED;
|
||||
if (flags & DETACH_DEAD) {
|
||||
event->pending_disable = 1;
|
||||
if (flags & DETACH_DEAD)
|
||||
state = PERF_EVENT_STATE_DEAD;
|
||||
}
|
||||
|
||||
event_sched_out(event, ctx);
|
||||
|
||||
if (event->state > PERF_EVENT_STATE_OFF)
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
|
||||
perf_event_set_state(event, min(event->state, state));
|
||||
|
||||
if (flags & DETACH_GROUP)
|
||||
|
@ -2562,6 +2566,15 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
|
|||
event_function_call(event, __perf_remove_from_context, (void *)flags);
|
||||
}
|
||||
|
||||
static void __event_disable(struct perf_event *event,
|
||||
struct perf_event_context *ctx,
|
||||
enum perf_event_state state)
|
||||
{
|
||||
event_sched_out(event, ctx);
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
perf_event_set_state(event, state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross CPU call to disable a performance event
|
||||
*/
|
||||
|
@ -2576,13 +2589,18 @@ static void __perf_event_disable(struct perf_event *event,
|
|||
perf_pmu_disable(event->pmu_ctx->pmu);
|
||||
ctx_time_update_event(ctx, event);
|
||||
|
||||
/*
|
||||
* When disabling a group leader, the whole group becomes ineligible
|
||||
* to run, so schedule out the full group.
|
||||
*/
|
||||
if (event == event->group_leader)
|
||||
group_sched_out(event, ctx);
|
||||
else
|
||||
event_sched_out(event, ctx);
|
||||
|
||||
perf_event_set_state(event, PERF_EVENT_STATE_OFF);
|
||||
perf_cgroup_event_disable(event, ctx);
|
||||
/*
|
||||
* But only mark the leader OFF; the siblings will remain
|
||||
* INACTIVE.
|
||||
*/
|
||||
__event_disable(event, ctx, PERF_EVENT_STATE_OFF);
|
||||
|
||||
perf_pmu_enable(event->pmu_ctx->pmu);
|
||||
}
|
||||
|
@ -2656,8 +2674,8 @@ static void perf_event_unthrottle(struct perf_event *event, bool start)
|
|||
|
||||
static void perf_event_throttle(struct perf_event *event)
|
||||
{
|
||||
event->pmu->stop(event, 0);
|
||||
event->hw.interrupts = MAX_INTERRUPTS;
|
||||
event->pmu->stop(event, 0);
|
||||
if (event == event->group_leader)
|
||||
perf_log_throttle(event, 0);
|
||||
}
|
||||
|
@ -7439,6 +7457,10 @@ perf_sample_ustack_size(u16 stack_size, u16 header_size,
|
|||
if (!regs)
|
||||
return 0;
|
||||
|
||||
/* No mm, no stack, no dump. */
|
||||
if (!current->mm)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Check if we fit in with the requested stack size into the:
|
||||
* - TASK_SIZE
|
||||
|
@ -8150,6 +8172,9 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
|
|||
const u32 max_stack = event->attr.sample_max_stack;
|
||||
struct perf_callchain_entry *callchain;
|
||||
|
||||
if (!current->mm)
|
||||
user = false;
|
||||
|
||||
if (!kernel && !user)
|
||||
return &__empty_callchain;
|
||||
|
||||
|
@ -11749,7 +11774,12 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
|
|||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (is_sampling_event(event)) {
|
||||
/*
|
||||
* The throttle can be triggered in the hrtimer handler.
|
||||
* The HRTIMER_NORESTART should be used to stop the timer,
|
||||
* rather than hrtimer_cancel(). See perf_swevent_hrtimer()
|
||||
*/
|
||||
if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) {
|
||||
ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
|
||||
local64_set(&hwc->period_left, ktime_to_ns(remaining));
|
||||
|
||||
|
@ -11804,7 +11834,8 @@ static void cpu_clock_event_start(struct perf_event *event, int flags)
|
|||
static void cpu_clock_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
perf_swevent_cancel_hrtimer(event);
|
||||
cpu_clock_event_update(event);
|
||||
if (flags & PERF_EF_UPDATE)
|
||||
cpu_clock_event_update(event);
|
||||
}
|
||||
|
||||
static int cpu_clock_event_add(struct perf_event *event, int flags)
|
||||
|
@ -11882,7 +11913,8 @@ static void task_clock_event_start(struct perf_event *event, int flags)
|
|||
static void task_clock_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
perf_swevent_cancel_hrtimer(event);
|
||||
task_clock_event_update(event, event->ctx->time);
|
||||
if (flags & PERF_EF_UPDATE)
|
||||
task_clock_event_update(event, event->ctx->time);
|
||||
}
|
||||
|
||||
static int task_clock_event_add(struct perf_event *event, int flags)
|
||||
|
|
|
@ -940,6 +940,15 @@ void __noreturn do_exit(long code)
|
|||
taskstats_exit(tsk, group_dead);
|
||||
trace_sched_process_exit(tsk, group_dead);
|
||||
|
||||
/*
|
||||
* Since sampling can touch ->mm, make sure to stop everything before we
|
||||
* tear it down.
|
||||
*
|
||||
* Also flushes inherited counters to the parent - before the parent
|
||||
* gets woken up by child-exit notifications.
|
||||
*/
|
||||
perf_event_exit_task(tsk);
|
||||
|
||||
exit_mm();
|
||||
|
||||
if (group_dead)
|
||||
|
@ -955,14 +964,6 @@ void __noreturn do_exit(long code)
|
|||
exit_task_work(tsk);
|
||||
exit_thread(tsk);
|
||||
|
||||
/*
|
||||
* Flush inherited counters to the parent - before the parent
|
||||
* gets woken up by child-exit notifications.
|
||||
*
|
||||
* because of cgroup mode, must be called before cgroup_exit()
|
||||
*/
|
||||
perf_event_exit_task(tsk);
|
||||
|
||||
sched_autogroup_exit_task(tsk);
|
||||
cgroup_exit(tsk);
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user