linux-yocto/kernel/sched/ext.h
Tejun Heo d86adb4fc0 sched_ext: Add cpuperf support
sched_ext currently does not integrate with schedutil. When schedutil is the
governor, frequencies are left unregulated and usually get stuck close to
the highest performance level from running RT tasks.

Add CPU performance monitoring and scaling support by integrating into
schedutil. The following kfuncs are added:

- scx_bpf_cpuperf_cap(): Query the relative performance capacity of
  different CPUs in the system.

- scx_bpf_cpuperf_cur(): Query the current performance level of a CPU
  relative to its max performance.

- scx_bpf_cpuperf_set(): Set the current target performance level of a CPU.

This gives direct control over CPU performance setting to the BPF scheduler.
The only changes on the schedutil side are accounting for the utilization
factor from sched_ext and disabling frequency holding heuristics as it may
not apply well to sched_ext schedulers which may have a lot weaker
connection between tasks and their current / last CPU.

With cpuperf support added, there is no reason to block uclamp. Enable while
at it.

A toy implementation of cpuperf is added to scx_qmap as a demonstration of
the feature.

v2: Ignore cpu_util_cfs_boost() when scx_switched_all() in sugov_get_util()
    to avoid factoring in stale util metric. (Christian)

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Christian Loehle <christian.loehle@arm.com>
2024-06-21 12:37:22 -10:00

120 lines
3.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
/*
* BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
*
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
*/
#ifdef CONFIG_SCHED_CLASS_EXT
struct sched_enq_and_set_ctx {
struct task_struct *p;
int queue_flags;
bool queued;
bool running;
};
void sched_deq_and_put_task(struct task_struct *p, int queue_flags,
struct sched_enq_and_set_ctx *ctx);
void sched_enq_and_set_task(struct sched_enq_and_set_ctx *ctx);
extern const struct sched_class ext_sched_class;
DECLARE_STATIC_KEY_FALSE(__scx_ops_enabled);
DECLARE_STATIC_KEY_FALSE(__scx_switched_all);
#define scx_enabled() static_branch_unlikely(&__scx_ops_enabled)
#define scx_switched_all() static_branch_unlikely(&__scx_switched_all)
DECLARE_STATIC_KEY_FALSE(scx_ops_cpu_preempt);
static inline bool task_on_scx(const struct task_struct *p)
{
return scx_enabled() && p->sched_class == &ext_sched_class;
}
void scx_tick(struct rq *rq);
void init_scx_entity(struct sched_ext_entity *scx);
void scx_pre_fork(struct task_struct *p);
int scx_fork(struct task_struct *p);
void scx_post_fork(struct task_struct *p);
void scx_cancel_fork(struct task_struct *p);
bool scx_can_stop_tick(struct rq *rq);
void scx_rq_activate(struct rq *rq);
void scx_rq_deactivate(struct rq *rq);
int scx_check_setscheduler(struct task_struct *p, int policy);
bool task_should_scx(struct task_struct *p);
void init_sched_ext_class(void);
static inline u32 scx_cpuperf_target(s32 cpu)
{
if (scx_enabled())
return cpu_rq(cpu)->scx.cpuperf_target;
else
return 0;
}
static inline const struct sched_class *next_active_class(const struct sched_class *class)
{
class++;
if (scx_switched_all() && class == &fair_sched_class)
class++;
if (!scx_enabled() && class == &ext_sched_class)
class++;
return class;
}
#define for_active_class_range(class, _from, _to) \
for (class = (_from); class != (_to); class = next_active_class(class))
#define for_each_active_class(class) \
for_active_class_range(class, __sched_class_highest, __sched_class_lowest)
/*
* SCX requires a balance() call before every pick_next_task() call including
* when waking up from idle.
*/
#define for_balance_class_range(class, prev_class, end_class) \
for_active_class_range(class, (prev_class) > &ext_sched_class ? \
&ext_sched_class : (prev_class), (end_class))
#ifdef CONFIG_SCHED_CORE
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
bool in_fi);
#endif
#else /* CONFIG_SCHED_CLASS_EXT */
#define scx_enabled() false
#define scx_switched_all() false
static inline void scx_tick(struct rq *rq) {}
static inline void scx_pre_fork(struct task_struct *p) {}
static inline int scx_fork(struct task_struct *p) { return 0; }
static inline void scx_post_fork(struct task_struct *p) {}
static inline void scx_cancel_fork(struct task_struct *p) {}
static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }
static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
static inline void scx_rq_activate(struct rq *rq) {}
static inline void scx_rq_deactivate(struct rq *rq) {}
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
static inline bool task_on_scx(const struct task_struct *p) { return false; }
static inline void init_sched_ext_class(void) {}
#define for_each_active_class for_each_class
#define for_balance_class_range for_class_range
#endif /* CONFIG_SCHED_CLASS_EXT */
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
void __scx_update_idle(struct rq *rq, bool idle);
static inline void scx_update_idle(struct rq *rq, bool idle)
{
if (scx_enabled())
__scx_update_idle(rq, idle);
}
#else
static inline void scx_update_idle(struct rq *rq, bool idle) {}
#endif