mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2026-01-06 01:47:54 +01:00
perf record: Add ratio-to-prev term
Provide ratio-to-prev term which allows the user to set the event sample period of two events corresponding to a desired ratio. If using on an Intel x86 platform with Auto Counter Reload support, also set corresponding event's config2 attribute with a bitmask which counters to reset and which counters to sample if the desired ratio is met or exceeded. On other platforms, only the sample period is affected by the ratio-to-prev term. Reviewed-by: Ian Rogers <irogers@google.com> Signed-off-by: Thomas Falcon <thomas.falcon@intel.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Dapeng Mi <dapeng1.mi@linux.intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
584754cbee
commit
6b9c0261b3
53
tools/perf/Documentation/intel-acr.txt
Normal file
53
tools/perf/Documentation/intel-acr.txt
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
Intel Auto Counter Reload Support
|
||||
---------------------------------
|
||||
Support for Intel Auto Counter Reload in perf tools
|
||||
|
||||
Auto counter reload provides a means for software to specify to hardware
|
||||
that certain counters, if supported, should be automatically reloaded
|
||||
upon overflow of chosen counters. By taking a sample only if the rate of
|
||||
one event exceeds some threshold relative to the rate of another event,
|
||||
this feature enables software to sample based on the relative rate of
|
||||
two or more events. To enable this, the user must provide a sample period
|
||||
term and a bitmask ("acr_mask") for each relevant event specifying the
|
||||
counters in an event group to reload if the event's specified sample
|
||||
period is exceeded.
|
||||
|
||||
For example, if the user desires to measure a scenario when IPC > 2,
|
||||
the event group might look like the one below:
|
||||
|
||||
perf record -e {cpu_atom/instructions,period=200000,acr_mask=0x2/, \
|
||||
cpu_atom/cycles,period=100000,acr_mask=0x3/} -- true
|
||||
|
||||
In this case, if the "instructions" counter exceeds the sample period of
|
||||
200000, the second counter, "cycles", will be reset and a sample will be
|
||||
taken. If "cycles" is exceeded first, both counters in the group will be
|
||||
reset. In this way, samples will only be taken for cases where IPC > 2.
|
||||
|
||||
The acr_mask term is a hexadecimal value representing a bitmask of the
|
||||
events in the group to be reset when the period is exceeded. In the
|
||||
example above, "instructions" is assigned an acr_mask of 0x2, meaning
|
||||
only the second event in the group is reloaded and a sample is taken
|
||||
for the first event. "cycles" is assigned an acr_mask of 0x3, meaning
|
||||
that both event counters will be reset if the sample period is exceeded
|
||||
first.
|
||||
|
||||
ratio-to-prev Event Term
|
||||
------------------------
|
||||
To simplify this, an event term "ratio-to-prev" is provided which is used
|
||||
alongside the sample period term n or the -c/--count option. This would
|
||||
allow users to specify the desired relative rate between events as a
|
||||
ratio. Note: Both events compared must belong to the same PMU.
|
||||
|
||||
The command above would then become
|
||||
|
||||
perf record -e {cpu_atom/instructions/, \
|
||||
cpu_atom/cycles,period=100000,ratio-to-prev=0.5/} -- true
|
||||
|
||||
ratio-to-prev is the ratio of the event using the term relative
|
||||
to the previous event in the group, which will always be 1,
|
||||
for a 1:0.5 or 2:1 ratio.
|
||||
|
||||
To sample for IPC < 2 for example, the events need to be reordered:
|
||||
|
||||
perf record -e {cpu_atom/cycles/, \
|
||||
cpu_atom/instructions,period=200000,ratio-to-prev=2.0/} -- true
|
||||
|
|
@ -393,6 +393,8 @@ Support raw format:
|
|||
. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
|
||||
a certain kind of events.
|
||||
|
||||
include::intel-acr.txt[]
|
||||
|
||||
SEE ALSO
|
||||
--------
|
||||
linkperf:perf-stat[1], linkperf:perf-top[1],
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <stdlib.h>
|
||||
#include "util/evlist.h"
|
||||
#include "util/evsel.h"
|
||||
#include "util/evsel_config.h"
|
||||
#include "util/env.h"
|
||||
#include "util/pmu.h"
|
||||
#include "util/pmus.h"
|
||||
|
|
@ -71,6 +72,57 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
|
|||
event_name);
|
||||
}
|
||||
|
||||
void arch_evsel__apply_ratio_to_prev(struct evsel *evsel,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
struct perf_event_attr *prev_attr = NULL;
|
||||
struct evsel *evsel_prev = NULL;
|
||||
const char *name = "acr_mask";
|
||||
int evsel_idx = 0;
|
||||
__u64 ev_mask, pr_ev_mask;
|
||||
|
||||
if (!perf_pmu__has_format(evsel->pmu, name)) {
|
||||
pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name);
|
||||
return;
|
||||
}
|
||||
if (perf_pmu__format_type(evsel->pmu, name) !=
|
||||
PERF_PMU_FORMAT_VALUE_CONFIG2) {
|
||||
pr_err("'%s' does not have config2 format support\n", evsel->pmu->name);
|
||||
return;
|
||||
}
|
||||
|
||||
evsel_prev = evsel__prev(evsel);
|
||||
if (!evsel_prev) {
|
||||
pr_err("Previous event does not exist.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
prev_attr = &evsel_prev->core.attr;
|
||||
|
||||
if (prev_attr->config2) {
|
||||
pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* acr_mask (config2) is calculated using the event's index in
|
||||
* the event group. The first event will use the index of the
|
||||
* second event as its mask (e.g., 0x2), indicating that the
|
||||
* second event counter will be reset and a sample taken for
|
||||
* the first event if its counter overflows. The second event
|
||||
* will use the mask consisting of the first and second bits
|
||||
* (e.g., 0x3), meaning both counters will be reset if the
|
||||
* second event counter overflows.
|
||||
*/
|
||||
|
||||
evsel_idx = evsel__group_idx(evsel);
|
||||
ev_mask = 1ull << evsel_idx;
|
||||
pr_ev_mask = 1ull << (evsel_idx - 1);
|
||||
|
||||
prev_attr->config2 = ev_mask;
|
||||
attr->config2 = ev_mask | pr_ev_mask;
|
||||
}
|
||||
|
||||
static void ibs_l3miss_warn(void)
|
||||
{
|
||||
pr_warning(
|
||||
|
|
|
|||
|
|
@ -1092,6 +1092,71 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
|
|||
}
|
||||
}
|
||||
|
||||
static void evsel__apply_ratio_to_prev(struct evsel *evsel,
|
||||
struct perf_event_attr *attr,
|
||||
struct record_opts *opts,
|
||||
const char *buf)
|
||||
{
|
||||
struct perf_event_attr *prev_attr = NULL;
|
||||
struct evsel *evsel_prev = NULL;
|
||||
u64 type = evsel->core.attr.sample_type;
|
||||
u64 prev_type = 0;
|
||||
double rtp;
|
||||
|
||||
rtp = strtod(buf, NULL);
|
||||
if (rtp <= 0) {
|
||||
pr_err("Invalid ratio-to-prev value %lf\n", rtp);
|
||||
return;
|
||||
}
|
||||
if (evsel == evsel__leader(evsel)) {
|
||||
pr_err("Invalid use of ratio-to-prev term without preceding element in group\n");
|
||||
return;
|
||||
}
|
||||
if (!evsel->pmu->is_core) {
|
||||
pr_err("Event using ratio-to-prev term must have a core PMU\n");
|
||||
return;
|
||||
}
|
||||
|
||||
evsel_prev = evsel__prev(evsel);
|
||||
if (!evsel_prev) {
|
||||
pr_err("Previous event does not exist.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (evsel_prev->pmu->type != evsel->pmu->type) {
|
||||
pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n",
|
||||
evsel->name, evsel_prev->name);
|
||||
return;
|
||||
}
|
||||
|
||||
prev_attr = &evsel_prev->core.attr;
|
||||
prev_type = evsel_prev->core.attr.sample_type;
|
||||
|
||||
if (!(prev_type & PERF_SAMPLE_PERIOD)) {
|
||||
attr->sample_period = prev_attr->sample_period * rtp;
|
||||
attr->freq = 0;
|
||||
evsel__reset_sample_bit(evsel, PERIOD);
|
||||
} else if (!(type & PERF_SAMPLE_PERIOD)) {
|
||||
prev_attr->sample_period = attr->sample_period / rtp;
|
||||
prev_attr->freq = 0;
|
||||
evsel__reset_sample_bit(evsel_prev, PERIOD);
|
||||
} else {
|
||||
if (opts->user_interval != ULLONG_MAX) {
|
||||
prev_attr->sample_period = opts->user_interval;
|
||||
attr->sample_period = prev_attr->sample_period * rtp;
|
||||
prev_attr->freq = 0;
|
||||
attr->freq = 0;
|
||||
evsel__reset_sample_bit(evsel_prev, PERIOD);
|
||||
evsel__reset_sample_bit(evsel, PERIOD);
|
||||
} else {
|
||||
pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
arch_evsel__apply_ratio_to_prev(evsel, attr);
|
||||
}
|
||||
|
||||
static void evsel__apply_config_terms(struct evsel *evsel,
|
||||
struct record_opts *opts, bool track)
|
||||
{
|
||||
|
|
@ -1105,6 +1170,7 @@ static void evsel__apply_config_terms(struct evsel *evsel,
|
|||
u32 dump_size = 0;
|
||||
int max_stack = 0;
|
||||
const char *callgraph_buf = NULL;
|
||||
const char *rtp_buf = NULL;
|
||||
|
||||
list_for_each_entry(term, config_terms, list) {
|
||||
switch (term->type) {
|
||||
|
|
@ -1175,6 +1241,9 @@ static void evsel__apply_config_terms(struct evsel *evsel,
|
|||
break;
|
||||
case EVSEL__CONFIG_TERM_CFG_CHG:
|
||||
break;
|
||||
case EVSEL__CONFIG_TERM_RATIO_TO_PREV:
|
||||
rtp_buf = term->val.str;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
@ -1226,6 +1295,8 @@ static void evsel__apply_config_terms(struct evsel *evsel,
|
|||
evsel__config_callchain(evsel, opts, ¶m);
|
||||
}
|
||||
}
|
||||
if (rtp_buf)
|
||||
evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf);
|
||||
}
|
||||
|
||||
struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
|
||||
|
|
@ -1250,6 +1321,11 @@ void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused,
|
|||
{
|
||||
}
|
||||
|
||||
void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
|
||||
struct perf_event_attr *attr __maybe_unused)
|
||||
{
|
||||
}
|
||||
|
||||
static void evsel__set_default_freq_period(struct record_opts *opts,
|
||||
struct perf_event_attr *attr)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -342,6 +342,7 @@ void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
|
|||
void arch_evsel__set_sample_weight(struct evsel *evsel);
|
||||
void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr);
|
||||
int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size);
|
||||
void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, struct perf_event_attr *attr);
|
||||
|
||||
int evsel__set_filter(struct evsel *evsel, const char *filter);
|
||||
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ enum evsel_term_type {
|
|||
EVSEL__CONFIG_TERM_AUX_ACTION,
|
||||
EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
|
||||
EVSEL__CONFIG_TERM_CFG_CHG,
|
||||
EVSEL__CONFIG_TERM_RATIO_TO_PREV,
|
||||
};
|
||||
|
||||
struct evsel_config_term {
|
||||
|
|
|
|||
|
|
@ -842,6 +842,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type)
|
|||
[PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache",
|
||||
[PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware",
|
||||
[PARSE_EVENTS__TERM_TYPE_CPU] = "cpu",
|
||||
[PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev",
|
||||
};
|
||||
if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR)
|
||||
return "unknown term";
|
||||
|
|
@ -892,6 +893,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er
|
|||
case PARSE_EVENTS__TERM_TYPE_RAW:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
default:
|
||||
if (!err)
|
||||
return false;
|
||||
|
|
@ -1045,6 +1047,21 @@ do { \
|
|||
perf_cpu_map__put(map);
|
||||
break;
|
||||
}
|
||||
case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
CHECK_TYPE_VAL(STR);
|
||||
if (strtod(term->val.str, NULL) <= 0) {
|
||||
parse_events_error__handle(parse_state->error, term->err_val,
|
||||
strdup("zero or negative"),
|
||||
NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (errno == ERANGE) {
|
||||
parse_events_error__handle(parse_state->error, term->err_val,
|
||||
strdup("too big"),
|
||||
NULL);
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
|
||||
case PARSE_EVENTS__TERM_TYPE_USER:
|
||||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
|
|
@ -1173,6 +1190,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
|
|||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
default:
|
||||
parse_events_error__handle(parse_state->error, term->err_term,
|
||||
strdup(parse_events__term_type_str(term->type_term)),
|
||||
|
|
@ -1295,6 +1313,9 @@ do { \
|
|||
ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size,
|
||||
term->val.num, term->weak);
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
ADD_CONFIG_TERM_STR(RATIO_TO_PREV, term->val.str, term->weak);
|
||||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_USER:
|
||||
case PARSE_EVENTS__TERM_TYPE_CONFIG:
|
||||
case PARSE_EVENTS__TERM_TYPE_CONFIG1:
|
||||
|
|
@ -1361,6 +1382,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head
|
|||
case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE:
|
||||
case PARSE_EVENTS__TERM_TYPE_HARDWARE:
|
||||
case PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,7 +83,8 @@ enum parse_events__term_type {
|
|||
PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
|
||||
PARSE_EVENTS__TERM_TYPE_HARDWARE,
|
||||
PARSE_EVENTS__TERM_TYPE_CPU,
|
||||
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1)
|
||||
PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV,
|
||||
#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV + 1)
|
||||
};
|
||||
|
||||
struct parse_events_term {
|
||||
|
|
|
|||
|
|
@ -337,6 +337,7 @@ aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); }
|
|||
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
|
||||
metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); }
|
||||
cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); }
|
||||
ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); }
|
||||
cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); }
|
||||
stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
|
||||
stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
|
||||
|
|
|
|||
|
|
@ -1541,7 +1541,7 @@ static int pmu_config_term(const struct perf_pmu *pmu,
|
|||
break;
|
||||
case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */
|
||||
return -EINVAL;
|
||||
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU:
|
||||
case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV:
|
||||
/* Skip non-config terms. */
|
||||
break;
|
||||
default:
|
||||
|
|
@ -1930,6 +1930,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call
|
|||
"aux-action=(pause|resume|start-paused)",
|
||||
"aux-sample-size=number",
|
||||
"cpu=number",
|
||||
"ratio-to-prev=string",
|
||||
};
|
||||
struct perf_pmu_format *format;
|
||||
int ret;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user