ANDROID: rust_binder: add priority inheritance

This implements priority inhertiance in the same way as how it is
implemented in Android Linux.

Change-Id: I0df7d4bc5d08c1f6568744701e5eaf90e86ecd00
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
Bug: 278052745
This commit is contained in:
Alice Ryhl 2023-08-29 13:28:13 +00:00
parent 691f0f1682
commit 2e4f09314c
10 changed files with 382 additions and 4 deletions

View File

@ -58,9 +58,17 @@ pub_no_prefix!(
BC_DEAD_BINDER_DONE
);
pub_no_prefix!(
flat_binder_object_shifts_,
FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT
);
pub_no_prefix!(
flat_binder_object_flags_,
FLAT_BINDER_FLAG_ACCEPTS_FDS,
FLAT_BINDER_FLAG_INHERIT_RT,
FLAT_BINDER_FLAG_PRIORITY_MASK,
FLAT_BINDER_FLAG_SCHED_POLICY_MASK,
FLAT_BINDER_FLAG_TXN_SECURITY_CTX
);

View File

@ -16,6 +16,7 @@ use kernel::{
use crate::{
defs::*,
error::BinderError,
prio::{self, BinderPriority},
process::{NodeRefInfo, Process, ProcessInner},
thread::Thread,
transaction::Transaction,
@ -163,6 +164,22 @@ impl Node {
}
}
pub(crate) fn node_prio(&self) -> prio::BinderPriority {
let flags = self.flags;
let priority = (flags & FLAT_BINDER_FLAG_PRIORITY_MASK) as prio::Nice;
let sched_policy =
(flags & FLAT_BINDER_FLAG_SCHED_POLICY_MASK) >> FLAT_BINDER_FLAG_SCHED_POLICY_SHIFT;
BinderPriority {
sched_policy,
prio: prio::to_kernel_prio(sched_policy, priority),
}
}
pub(crate) fn inherit_rt(&self) -> bool {
(self.flags & FLAT_BINDER_FLAG_INHERIT_RT) != 0
}
/// An id that is unique across all binder nodes on the system. Used as the key in the
/// `by_node` map.
pub(crate) fn global_id(&self) -> usize {

View File

@ -0,0 +1,80 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2024 Google LLC.
//! This module defines the types and methods relevant to priority inheritance.
use kernel::bindings;
pub(crate) type Policy = core::ffi::c_uint;
pub(crate) type Priority = core::ffi::c_int;
pub(crate) type Nice = core::ffi::c_int;
pub(crate) const SCHED_NORMAL: Policy = bindings::SCHED_NORMAL;
pub(crate) const SCHED_FIFO: Policy = bindings::SCHED_FIFO;
pub(crate) const MIN_NICE: Nice = bindings::MIN_NICE as _;
pub(crate) const MAX_NICE: Nice = bindings::MAX_NICE as _;
pub(crate) const DEFAULT_PRIO: Priority = bindings::DEFAULT_PRIO as _;
pub(crate) const MAX_RT_PRIO: Priority = bindings::MAX_RT_PRIO as _;
/// Scheduler policy and priority.
///
/// The binder driver supports inheriting the following scheduler policies:
/// * SCHED_NORMAL
/// * SCHED_BATCH
/// * SCHED_FIFO
/// * SCHED_RR
#[derive(Copy, Clone, Default)]
pub(crate) struct BinderPriority {
pub(crate) sched_policy: Policy,
pub(crate) prio: Priority,
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub(crate) enum PriorityState {
Set,
Pending,
Abort,
}
pub(crate) fn get_default_prio_from_task(task: &kernel::task::Task) -> BinderPriority {
if is_supported_policy(task.policy()) {
BinderPriority {
sched_policy: task.policy(),
prio: task.normal_prio(),
}
} else {
BinderPriority {
sched_policy: SCHED_NORMAL,
prio: DEFAULT_PRIO,
}
}
}
pub(crate) fn is_rt_policy(policy: Policy) -> bool {
policy == bindings::SCHED_FIFO || policy == bindings::SCHED_RR
}
pub(crate) fn is_fair_policy(policy: Policy) -> bool {
policy == bindings::SCHED_NORMAL || policy == bindings::SCHED_BATCH
}
pub(crate) fn is_supported_policy(policy: Policy) -> bool {
is_fair_policy(policy) || is_rt_policy(policy)
}
pub(crate) fn to_userspace_prio(policy: Policy, prio: Priority) -> Nice {
if is_fair_policy(policy) {
prio - DEFAULT_PRIO
} else {
MAX_RT_PRIO - 1 - prio
}
}
pub(crate) fn to_kernel_prio(policy: Policy, prio: Nice) -> Priority {
if is_fair_policy(policy) {
prio + DEFAULT_PRIO
} else {
MAX_RT_PRIO - 1 - prio
}
}

View File

@ -37,6 +37,7 @@ use crate::{
defs::*,
error::{BinderError, BinderResult},
node::{Node, NodeDeath, NodeRef},
prio::{self, BinderPriority},
range_alloc::{self, RangeAllocator},
thread::{PushWorkRes, Thread},
DArc, DLArc, DTRWrap, DeliverToRead,
@ -137,6 +138,8 @@ impl ProcessInner {
) -> Result<(), (BinderError, DLArc<dyn DeliverToRead>)> {
// Try to find a ready thread to which to push the work.
if let Some(thread) = self.ready_threads.pop_front() {
work.on_thread_selected(&thread);
// Push to thread while holding state lock. This prevents the thread from giving up
// (for example, because of a signal) when we're about to deliver work.
match thread.push_work(work) {
@ -390,6 +393,8 @@ pub(crate) struct Process {
#[pin]
pub(crate) inner: SpinLock<ProcessInner>,
pub(crate) default_priority: BinderPriority,
// Waitqueue of processes waiting for all outstanding transactions to be
// processed.
#[pin]
@ -447,13 +452,15 @@ impl workqueue::WorkItem for Process {
impl Process {
fn new(ctx: Arc<Context>, cred: ARef<Credential>) -> Result<Arc<Self>> {
let current = kernel::current!();
let list_process = ListArc::pin_init(pin_init!(Process {
ctx,
cred,
default_priority: prio::get_default_prio_from_task(current),
inner <- kernel::new_spinlock!(ProcessInner::new(), "Process::inner"),
node_refs <- kernel::new_mutex!(ProcessNodeRefs::new(), "Process::node_refs"),
freeze_wait <- kernel::new_condvar!("Process::freeze_wait"),
task: kernel::current!().group_leader().into(),
task: current.group_leader().into(),
defer_work <- kernel::new_work!("Process::defer_work"),
links <- ListLinks::new(),
}))?;

View File

@ -3,6 +3,7 @@
// Copyright (C) 2024 Google LLC.
//! Binder -- the Android IPC mechanism.
#![recursion_limit = "256"]
use kernel::{
bindings::{self, seq_file},
@ -26,6 +27,7 @@ mod context;
mod defs;
mod error;
mod node;
mod prio;
mod process;
mod range_alloc;
mod thread;
@ -55,6 +57,10 @@ trait DeliverToRead: ListArcSafe + Send + Sync {
/// won't be delivered.
fn cancel(self: DArc<Self>) {}
/// Called when a work item is delivered directly to a specific thread, rather than to the
/// process work list.
fn on_thread_selected(&self, _thread: &thread::Thread) {}
/// Should we use `wake_up_interruptible_sync` or `wake_up_interruptible` when scheduling this
/// work item?
///

View File

@ -18,7 +18,8 @@ use kernel::{
security,
sync::poll::{PollCondVar, PollTable},
sync::{Arc, SpinLock},
types::Either,
task::Task,
types::{ARef, Either},
uaccess::{UserSlice, UserSliceWriter},
};
@ -26,6 +27,7 @@ use crate::{
allocation::{Allocation, AllocationView, BinderObject, BinderObjectRef},
defs::*,
error::BinderResult,
prio::{self, BinderPriority, PriorityState},
process::Process,
ptr_align,
transaction::Transaction,
@ -403,14 +405,22 @@ impl InnerThread {
}
}
pub(crate) struct ThreadPrioState {
pub(crate) state: PriorityState,
pub(crate) next: BinderPriority,
}
/// This represents a thread that's used with binder.
#[pin_data]
pub(crate) struct Thread {
pub(crate) id: i32,
pub(crate) process: Arc<Process>,
pub(crate) task: ARef<Task>,
#[pin]
inner: SpinLock<InnerThread>,
#[pin]
pub(crate) prio_lock: SpinLock<ThreadPrioState>,
#[pin]
work_condvar: PollCondVar,
/// Used to insert this thread into the process' `ready_threads` list.
///
@ -439,10 +449,17 @@ impl Thread {
pub(crate) fn new(id: i32, process: Arc<Process>) -> Result<Arc<Self>> {
let inner = InnerThread::new()?;
let prio = ThreadPrioState {
state: PriorityState::Set,
next: BinderPriority::default(),
};
Arc::pin_init(pin_init!(Thread {
id,
process,
task: ARef::from(kernel::current!()),
inner <- kernel::new_spinlock!(inner, "Thread::inner"),
prio_lock <- kernel::new_spinlock!(prio, "Thread::prio_lock"),
work_condvar <- kernel::new_poll_condvar!("Thread::work_condvar"),
links <- ListLinks::new(),
links_track <- AtomicListArcTracker::new(),
@ -538,6 +555,8 @@ impl Thread {
return Ok(Some(work));
}
self.restore_priority(&self.process.default_priority);
inner.looper_flags |= LOOPER_WAITING | LOOPER_WAITING_PROC;
let signal_pending = self.work_condvar.wait_interruptible_freezable(&mut inner);
inner.looper_flags &= !(LOOPER_WAITING | LOOPER_WAITING_PROC);
@ -600,6 +619,90 @@ impl Thread {
self.inner.lock().push_return_work(reply);
}
fn do_set_priority(&self, desired: &BinderPriority, verify: bool) {
let task = &*self.task;
let mut policy = desired.sched_policy;
let mut priority;
if task.policy() == policy && task.normal_prio() == desired.prio {
let mut prio_state = self.prio_lock.lock();
if prio_state.state == PriorityState::Pending {
prio_state.state = PriorityState::Set;
}
return;
}
let has_cap_nice = task.has_capability_noaudit(bindings::CAP_SYS_NICE as _);
priority = prio::to_userspace_prio(policy, desired.prio);
if verify && prio::is_rt_policy(policy) && !has_cap_nice {
// For rt_policy, we store the rt priority as a nice. (See to_userspace_prio and
// to_kernel_prio impls.)
let max_rtprio: prio::Nice = task.rlimit_rtprio();
if max_rtprio == 0 {
policy = prio::SCHED_NORMAL;
priority = prio::MIN_NICE;
} else if priority > max_rtprio {
priority = max_rtprio;
}
}
if verify && prio::is_fair_policy(policy) && !has_cap_nice {
let min_nice = task.rlimit_nice();
if min_nice > prio::MAX_NICE {
pr_err!("{} RLIMIT_NICE not set", task.pid());
return;
} else if priority < min_nice {
priority = min_nice;
}
}
if policy != desired.sched_policy || prio::to_kernel_prio(policy, priority) != desired.prio
{
pr_debug!(
"{}: priority {} not allowed, using {} instead",
task.pid(),
desired.prio,
prio::to_kernel_prio(policy, priority),
);
}
let mut prio_state = self.prio_lock.lock();
if !verify && prio_state.state == PriorityState::Abort {
// A new priority has been set by an incoming nested
// transaction. Abort this priority restore and allow
// the transaction to run at the new desired priority.
drop(prio_state);
pr_debug!("{}: aborting priority restore", task.pid());
return;
}
// Set the actual priority.
if task.policy() != policy || prio::is_rt_policy(policy) {
let prio = if prio::is_rt_policy(policy) {
priority
} else {
0
};
task.sched_setscheduler_nocheck(policy as i32, prio, true);
}
if prio::is_fair_policy(policy) {
task.set_user_nice(priority);
}
prio_state.state = PriorityState::Set;
}
pub(crate) fn set_priority(&self, desired: &BinderPriority) {
self.do_set_priority(desired, true);
}
pub(crate) fn restore_priority(&self, desired: &BinderPriority) {
self.do_set_priority(desired, false);
}
fn translate_object(
&self,
obj_index: usize,
@ -1171,7 +1274,7 @@ impl Thread {
}
// We need to complete the transaction even if we cannot complete building the reply.
(|| -> BinderResult<_> {
let out = (|| -> BinderResult<_> {
let completion = DTRWrap::arc_try_new(DeliverCode::new(BR_TRANSACTION_COMPLETE))?;
let process = orig.from.process.clone();
let allow_fds = orig.flags & TF_ACCEPT_FDS != 0;
@ -1191,7 +1294,11 @@ impl Thread {
orig.from.deliver_reply(reply, &orig);
err.reply = BR_TRANSACTION_COMPLETE;
err
})
});
// Restore the priority even on failure.
self.restore_priority(&orig.saved_priority());
out
}
fn oneway_transaction_inner(self: &Arc<Self>, tr: &BinderTransactionDataSg) -> BinderResult {

View File

@ -17,6 +17,7 @@ use crate::{
defs::*,
error::{BinderError, BinderResult},
node::{Node, NodeRef},
prio::{self, BinderPriority, PriorityState},
process::{Process, ProcessInner},
ptr_align,
thread::{PushWorkRes, Thread},
@ -32,6 +33,10 @@ pub(crate) struct Transaction {
#[pin]
allocation: SpinLock<Option<Allocation>>,
is_outstanding: AtomicBool,
set_priority_called: AtomicBool,
priority: BinderPriority,
#[pin]
saved_priority: SpinLock<BinderPriority>,
code: u32,
pub(crate) flags: u32,
data_size: usize,
@ -87,6 +92,16 @@ impl Transaction {
alloc.set_info_target_node(node_ref);
let data_address = alloc.ptr;
let priority =
if (trd.flags & TF_ONE_WAY == 0) && prio::is_supported_policy(from.task.policy()) {
BinderPriority {
sched_policy: from.task.policy(),
prio: from.task.normal_prio(),
}
} else {
from.process.default_priority
};
Ok(DTRWrap::arc_pin_init(pin_init!(Transaction {
target_node: Some(target_node),
from_parent,
@ -100,6 +115,9 @@ impl Transaction {
data_address,
allocation <- kernel::new_spinlock!(Some(alloc), "Transaction::new"),
is_outstanding: AtomicBool::new(false),
priority,
saved_priority <- kernel::new_spinlock!(BinderPriority::default(), "Transaction::saved_priority"),
set_priority_called: AtomicBool::new(false),
txn_security_ctx_off,
oneway_spam_detected,
}))?)
@ -136,11 +154,18 @@ impl Transaction {
data_address: alloc.ptr,
allocation <- kernel::new_spinlock!(Some(alloc), "Transaction::new"),
is_outstanding: AtomicBool::new(false),
priority: BinderPriority::default(),
saved_priority <- kernel::new_spinlock!(BinderPriority::default(), "Transaction::saved_priority"),
set_priority_called: AtomicBool::new(false),
txn_security_ctx_off: None,
oneway_spam_detected,
}))?)
}
pub(crate) fn saved_priority(&self) -> BinderPriority {
*self.saved_priority.lock()
}
/// Determines if the transaction is stacked on top of the given transaction.
pub(crate) fn is_stacked_on(&self, onext: &Option<DArc<Self>>) -> bool {
match (&self.from_parent, onext) {
@ -309,6 +334,11 @@ impl DeliverToRead for Transaction {
}
self.drop_outstanding_txn();
});
// Update thread priority. This only has an effect if the transaction is delivered via the
// process work list, since the priority has otherwise already been updated.
self.on_thread_selected(thread);
let files = if let Ok(list) = self.prepare_file_list() {
list
} else {
@ -391,6 +421,56 @@ impl DeliverToRead for Transaction {
self.drop_outstanding_txn();
}
fn on_thread_selected(&self, to_thread: &Thread) {
// Return immediately if reply.
let target_node = match self.target_node.as_ref() {
Some(target_node) => target_node,
None => return,
};
// We only need to do this once.
if self.set_priority_called.swap(true, Ordering::Relaxed) {
return;
}
let node_prio = target_node.node_prio();
let mut desired = self.priority;
if !target_node.inherit_rt() && prio::is_rt_policy(desired.sched_policy) {
desired.prio = prio::DEFAULT_PRIO;
desired.sched_policy = prio::SCHED_NORMAL;
}
if node_prio.prio < self.priority.prio
|| (node_prio.prio == self.priority.prio && node_prio.sched_policy == prio::SCHED_FIFO)
{
// In case the minimum priority on the node is
// higher (lower value), use that priority. If
// the priority is the same, but the node uses
// SCHED_FIFO, prefer SCHED_FIFO, since it can
// run unbounded, unlike SCHED_RR.
desired = node_prio;
}
let mut prio_state = to_thread.prio_lock.lock();
if prio_state.state == PriorityState::Pending {
// Task is in the process of changing priorities
// saving its current values would be incorrect.
// Instead, save the pending priority and signal
// the task to abort the priority restore.
prio_state.state = PriorityState::Abort;
*self.saved_priority.lock() = prio_state.next;
} else {
let task = &*self.to.task;
let mut saved_priority = self.saved_priority.lock();
saved_priority.sched_policy = task.policy();
saved_priority.prio = task.normal_prio();
}
drop(prio_state);
to_thread.set_priority(&desired);
}
fn should_sync_wakeup(&self) -> bool {
self.flags & TF_ONE_WAY == 0
}

View File

@ -7,6 +7,7 @@
*/
#include <kunit/test.h>
#include <linux/capability.h>
#include <linux/cred.h>
#include <linux/errname.h>
#include <linux/fdtable.h>
@ -26,6 +27,7 @@
#include <linux/workqueue.h>
#include <uapi/linux/android/binder.h>
#include <uapi/linux/android/binderfs.h>
#include <uapi/linux/sched/types.h>
/* `bindgen` gets confused at certain things. */
const size_t RUST_CONST_HELPER_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;

View File

@ -313,6 +313,13 @@ void rust_helper_init_task_work(struct callback_head *twork,
}
EXPORT_SYMBOL_GPL(rust_helper_init_task_work);
unsigned long rust_helper_task_rlimit(const struct task_struct *task,
unsigned int limit)
{
return task_rlimit(task, limit);
}
EXPORT_SYMBOL_GPL(rust_helper_task_rlimit);
void rust_helper_rb_link_node(struct rb_node *node, struct rb_node *parent,
struct rb_node **rb_link)
{

View File

@ -212,6 +212,70 @@ impl Task {
// running.
unsafe { bindings::wake_up_process(self.0.get()) };
}
/// Check if the task has the given capability without logging to the audit log.
pub fn has_capability_noaudit(&self, capability: i32) -> bool {
// SAFETY: By the type invariant, we know that `self.0.get()` is valid.
unsafe { bindings::has_capability_noaudit(self.0.get(), capability) }
}
/// Returns the current scheduling policy.
pub fn policy(&self) -> u32 {
// SAFETY: The file is valid because the shared reference guarantees a nonzero refcount.
//
// This uses a volatile read because C code may be modifying this field in parallel using
// non-atomic unsynchronized writes. This corresponds to how the C macro READ_ONCE is
// implemented.
unsafe { core::ptr::addr_of!((*self.0.get()).policy).read_volatile() }
}
/// Returns the current normal priority.
pub fn normal_prio(&self) -> i32 {
// SAFETY: The file is valid because the shared reference guarantees a nonzero refcount.
//
// This uses a volatile read because C code may be modifying this field in parallel using
// non-atomic unsynchronized writes. This corresponds to how the C macro READ_ONCE is
// implemented.
unsafe { core::ptr::addr_of!((*self.0.get()).normal_prio).read_volatile() }
}
/// Get the rlimit value for RTPRIO.
pub fn rlimit_rtprio(&self) -> i32 {
// SAFETY: By the type invariant, we know that `self.0.get()` is valid, and RLIMIT_RTPRIO
// is a valid limit type.
unsafe { bindings::task_rlimit(self.0.get(), bindings::RLIMIT_RTPRIO) as i32 }
}
/// Get the rlimit value for NICE, converted to a nice value.
pub fn rlimit_nice(&self) -> i32 {
// SAFETY: By the type invariant, we know that `self.0.get()` is valid, and RLIMIT_NICE
// is a valid limit type.
let prio = unsafe { bindings::task_rlimit(self.0.get(), bindings::RLIMIT_NICE) as i32 };
// Convert rlimit style value [1,40] to nice value [-20, 19].
bindings::MAX_NICE as i32 - prio + 1
}
/// Set the scheduling properties for this task without checking whether the task is allowed to
/// set them.
pub fn sched_setscheduler_nocheck(
&self,
policy: i32,
sched_priority: i32,
reset_on_fork: bool,
) {
let params = bindings::sched_param { sched_priority };
let mut policy = policy;
if reset_on_fork {
policy |= bindings::SCHED_RESET_ON_FORK as i32;
}
unsafe { bindings::sched_setscheduler_nocheck(self.0.get(), policy, &params) };
}
/// Set the nice value of this task.
pub fn set_user_nice(&self, nice: i32) {
unsafe { bindings::set_user_nice(self.0.get(), nice as _) };
}
}
impl Kuid {