Merge branch 'slab/for-6.15/kfree_rcu_tiny' into slab/for-next

Merge the slab feature branch kfree_rcu_tiny for 6.15:

- Move the TINY_RCU kvfree_rcu() implementation from RCU to SLAB
  subsystem and cleanup its integration.
This commit is contained in:
Vlastimil Babka 2025-03-06 09:46:34 +01:00
commit dea2d9221e
11 changed files with 113 additions and 126 deletions

View File

@ -1025,12 +1025,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define RCU_POINTER_INITIALIZER(p, v) \
.p = RCU_INITIALIZER(v)
/*
* Does the specified offset indicate that the corresponding rcu_head
* structure can be handled by kvfree_rcu()?
*/
#define __is_kvfree_rcu_offset(offset) ((offset) < 4096)
/**
* kfree_rcu() - kfree an object after a grace period.
* @ptr: pointer to kfree for double-argument invocations.
@ -1041,11 +1035,11 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* when they are used in a kernel module, that module must invoke the
* high-latency rcu_barrier() function at module-unload time.
*
* The kfree_rcu() function handles this issue. Rather than encoding a
* function address in the embedded rcu_head structure, kfree_rcu() instead
* encodes the offset of the rcu_head structure within the base structure.
* Because the functions are not allowed in the low-order 4096 bytes of
* kernel virtual memory, offsets up to 4095 bytes can be accommodated.
* The kfree_rcu() function handles this issue. In order to have a universal
* callback function handling different offsets of rcu_head, the callback needs
* to determine the starting address of the freed object, which can be a large
* kmalloc or vmalloc allocation. To allow simply aligning the pointer down to
* page boundary for those, only offsets up to 4095 bytes can be accommodated.
* If the offset is larger than 4095 bytes, a compile-time error will
* be generated in kvfree_rcu_arg_2(). If this error is triggered, you can
* either fall back to use of call_rcu() or rearrange the structure to
@ -1082,14 +1076,23 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
#define kfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
#define kvfree_rcu_mightsleep(ptr) kvfree_rcu_arg_1(ptr)
/*
* In mm/slab_common.c, no suitable header to include here.
*/
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
/*
* The BUILD_BUG_ON() makes sure the rcu_head offset can be handled. See the
* comment of kfree_rcu() for details.
*/
#define kvfree_rcu_arg_2(ptr, rhf) \
do { \
typeof (ptr) ___p = (ptr); \
\
if (___p) { \
BUILD_BUG_ON(!__is_kvfree_rcu_offset(offsetof(typeof(*(ptr)), rhf))); \
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
} \
if (___p) { \
BUILD_BUG_ON(offsetof(typeof(*(ptr)), rhf) >= 4096); \
kvfree_call_rcu(&((___p)->rhf), (void *) (___p)); \
} \
} while (0)
#define kvfree_rcu_arg_1(ptr) \

View File

@ -90,41 +90,6 @@ static inline void synchronize_rcu_expedited(void)
synchronize_rcu();
}
/*
* Add one more declaration of kvfree() here. It is
* not so straight forward to just include <linux/mm.h>
* where it is defined due to getting many compile
* errors caused by that include.
*/
extern void kvfree(const void *addr);
static inline void __kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
call_rcu(head, (rcu_callback_t) ((void *) head - ptr));
return;
}
// kvfree_rcu(one_arg) call.
might_sleep();
synchronize_rcu();
kvfree(ptr);
}
static inline void kvfree_rcu_barrier(void)
{
rcu_barrier();
}
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
#else
static inline void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
__kvfree_call_rcu(head, ptr);
}
#endif
void rcu_qs(void);
static inline void rcu_softirq_qs(void)
@ -164,7 +129,6 @@ static inline void rcu_end_inkernel_boot(void) { }
static inline bool rcu_inkernel_boot_has_ended(void) { return true; }
static inline bool rcu_is_watching(void) { return true; }
static inline void rcu_momentary_eqs(void) { }
static inline void kfree_rcu_scheduler_running(void) { }
/* Avoid RCU read-side critical sections leaking across. */
static inline void rcu_all_qs(void) { barrier(); }

View File

@ -34,12 +34,9 @@ static inline void rcu_virt_note_context_switch(void)
}
void synchronize_rcu_expedited(void);
void kvfree_call_rcu(struct rcu_head *head, void *ptr);
void kvfree_rcu_barrier(void);
void rcu_barrier(void);
void rcu_momentary_eqs(void);
void kfree_rcu_scheduler_running(void);
struct rcu_gp_oldstate {
unsigned long rgos_norm;

View File

@ -16,6 +16,7 @@
#include <linux/gfp.h>
#include <linux/overflow.h>
#include <linux/types.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
#include <linux/percpu-refcount.h>
#include <linux/cleanup.h>
@ -1080,6 +1081,19 @@ extern void kvfree_sensitive(const void *addr, size_t len);
unsigned int kmem_cache_size(struct kmem_cache *s);
#ifndef CONFIG_KVFREE_RCU_BATCHED
static inline void kvfree_rcu_barrier(void)
{
rcu_barrier();
}
static inline void kfree_rcu_scheduler_running(void) { }
#else
void kvfree_rcu_barrier(void);
void kfree_rcu_scheduler_running(void);
#endif
/**
* kmalloc_size_roundup - Report allocation bucket size for the given size
*

View File

@ -560,40 +560,6 @@ TRACE_EVENT_RCU(rcu_segcb_stats,
);
/*
* Tracepoint for the registration of a single RCU callback of the special
* kvfree() form. The first argument is the RCU type, the second argument
* is a pointer to the RCU callback, the third argument is the offset
* of the callback within the enclosing RCU-protected data structure,
* the fourth argument is the number of lazy callbacks queued, and the
* fifth argument is the total number of callbacks queued.
*/
TRACE_EVENT_RCU(rcu_kvfree_callback,
TP_PROTO(const char *rcuname, struct rcu_head *rhp, unsigned long offset,
long qlen),
TP_ARGS(rcuname, rhp, offset, qlen),
TP_STRUCT__entry(
__field(const char *, rcuname)
__field(void *, rhp)
__field(unsigned long, offset)
__field(long, qlen)
),
TP_fast_assign(
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->offset = offset;
__entry->qlen = qlen;
),
TP_printk("%s rhp=%p func=%ld %ld",
__entry->rcuname, __entry->rhp, __entry->offset,
__entry->qlen)
);
/*
* Tracepoint for marking the beginning rcu_do_batch, performed to start
* RCU callback invocation. The first argument is the RCU flavor,

View File

@ -85,15 +85,8 @@ void rcu_sched_clock_irq(int user)
static inline bool rcu_reclaim_tiny(struct rcu_head *head)
{
rcu_callback_t f;
unsigned long offset = (unsigned long)head->func;
rcu_lock_acquire(&rcu_callback_map);
if (__is_kvfree_rcu_offset(offset)) {
trace_rcu_invoke_kvfree_callback("", head, offset);
kvfree((void *)head - offset);
rcu_lock_release(&rcu_callback_map);
return true;
}
trace_rcu_invoke_callback("", head);
f = head->func;
@ -159,10 +152,6 @@ void synchronize_rcu(void)
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
static void tiny_rcu_leak_callback(struct rcu_head *rhp)
{
}
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* period. But since we have but one CPU, that would be after any
@ -178,9 +167,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
mem_dump_obj(head);
}
if (!__is_kvfree_rcu_offset((unsigned long)head->func))
WRITE_ONCE(head->func, tiny_rcu_leak_callback);
return;
}
@ -246,17 +232,6 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
#ifdef CONFIG_KASAN_GENERIC
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head)
kasan_record_aux_stack(ptr);
__kvfree_call_rcu(head, ptr);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
#endif
void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

View File

@ -2931,13 +2931,8 @@ static int __init rcu_spawn_core_kthreads(void)
static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func)
{
rcu_segcblist_enqueue(&rdp->cblist, head);
if (__is_kvfree_rcu_offset((unsigned long)func))
trace_rcu_kvfree_callback(rcu_state.name, head,
(unsigned long)func,
rcu_segcblist_n_cbs(&rdp->cblist));
else
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));
trace_rcu_callback(rcu_state.name, head,
rcu_segcblist_n_cbs(&rdp->cblist));
trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued"));
}

View File

@ -242,6 +242,10 @@ menu "Slab allocator options"
config SLUB
def_bool y
config KVFREE_RCU_BATCHED
def_bool y
depends on !SLUB_TINY && !TINY_RCU
config SLUB_TINY
bool "Configure for minimal memory footprint"
depends on EXPERT

View File

@ -582,6 +582,8 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
void **p, int objects, struct slabobj_ext *obj_exts);
#endif
void kvfree_rcu_cb(struct rcu_head *head);
size_t __ksize(const void *objp);
static inline size_t slab_ksize(const struct kmem_cache *s)

View File

@ -1277,6 +1277,29 @@ EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
EXPORT_TRACEPOINT_SYMBOL(kfree);
EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
#ifndef CONFIG_KVFREE_RCU_BATCHED
void kvfree_call_rcu(struct rcu_head *head, void *ptr)
{
if (head) {
kasan_record_aux_stack(ptr);
call_rcu(head, kvfree_rcu_cb);
return;
}
// kvfree_rcu(one_arg) call.
might_sleep();
synchronize_rcu();
kvfree(ptr);
}
EXPORT_SYMBOL_GPL(kvfree_call_rcu);
void __init kvfree_rcu_init(void)
{
}
#else /* CONFIG_KVFREE_RCU_BATCHED */
/*
* This rcu parameter is runtime-read-only. It reflects
* a minimum allowed number of objects which can be cached
@ -1527,8 +1550,7 @@ kvfree_rcu_list(struct rcu_head *head)
rcu_lock_acquire(&rcu_callback_map);
trace_rcu_invoke_kvfree_callback("slab", head, offset);
if (!WARN_ON_ONCE(!__is_kvfree_rcu_offset(offset)))
kvfree(ptr);
kvfree(ptr);
rcu_lock_release(&rcu_callback_map);
cond_resched_tasks_rcu_qs();
@ -1856,8 +1878,6 @@ add_ptr_to_bulk_krc_lock(struct kfree_rcu_cpu **krcp,
return true;
}
#if !defined(CONFIG_TINY_RCU)
static enum hrtimer_restart
schedule_page_work_fn(struct hrtimer *t)
{
@ -2066,8 +2086,6 @@ void kvfree_rcu_barrier(void)
}
EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
#endif /* #if !defined(CONFIG_TINY_RCU) */
static unsigned long
kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
@ -2161,3 +2179,6 @@ void __init kvfree_rcu_init(void)
shrinker_register(kfree_rcu_shrinker);
}
#endif /* CONFIG_KVFREE_RCU_BATCHED */

View File

@ -19,6 +19,7 @@
#include <linux/bitops.h>
#include <linux/slab.h>
#include "slab.h"
#include <linux/vmalloc.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/kasan.h>
@ -4757,6 +4758,51 @@ static void free_large_kmalloc(struct folio *folio, void *object)
folio_put(folio);
}
/*
* Given an rcu_head embedded within an object obtained from kvmalloc at an
* offset < 4k, free the object in question.
*/
void kvfree_rcu_cb(struct rcu_head *head)
{
void *obj = head;
struct folio *folio;
struct slab *slab;
struct kmem_cache *s;
void *slab_addr;
if (is_vmalloc_addr(obj)) {
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
vfree(obj);
return;
}
folio = virt_to_folio(obj);
if (!folio_test_slab(folio)) {
/*
* rcu_head offset can be only less than page size so no need to
* consider folio order
*/
obj = (void *) PAGE_ALIGN_DOWN((unsigned long)obj);
free_large_kmalloc(folio, obj);
return;
}
slab = folio_slab(folio);
s = slab->slab_cache;
slab_addr = folio_address(folio);
if (is_kfence_address(obj)) {
obj = kfence_object_start(obj);
} else {
unsigned int idx = __obj_to_index(s, slab_addr, obj);
obj = slab_addr + s->size * idx;
obj = fixup_red_left(s, obj);
}
slab_free(s, slab, obj, _RET_IP_);
}
/**
* kfree - free previously allocated memory
* @object: pointer returned by kmalloc() or kmem_cache_alloc()