mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-05 05:15:23 +02:00

In recent discussions around some performance improvements in the file handling area we discussed switching the file cache to rely on SLAB_TYPESAFE_BY_RCU which allows us to get rid of call_rcu() based freeing for files completely. This is a pretty sensitive change overall but it might actually be worth doing. The main downside is the subtlety. The other one is that we should really wait for Jann's patch to land that enables KASAN to handle SLAB_TYPESAFE_BY_RCU UAFs. Currently it doesn't but a patch for this exists. With SLAB_TYPESAFE_BY_RCU objects may be freed and reused multiple times which requires a few changes. So it isn't sufficient anymore to just acquire a reference to the file in question under rcu using atomic_long_inc_not_zero() since the file might have already been recycled and someone else might have bumped the reference. In other words, callers might see reference count bumps from newer users. For this reason it is necessary to verify that the pointer is the same before and after the reference count increment. This pattern can be seen in get_file_rcu() and __files_get_rcu(). In addition, it isn't possible to access or check fields in struct file without first aqcuiring a reference on it. Not doing that was always very dodgy and it was only usable for non-pointer data in struct file. With SLAB_TYPESAFE_BY_RCU it is necessary that callers first acquire a reference under rcu or they must hold the files_lock of the fdtable. Failing to do either one of this is a bug. Thanks to Jann for pointing out that we need to ensure memory ordering between reallocations and pointer check by ensuring that all subsequent loads have a dependency on the second load in get_file_rcu() and providing a fixup that was folded into this patch. Cc: Jann Horn <jannh@google.com> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
124 lines
3.3 KiB
C
124 lines
3.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* descriptor table internals; you almost certainly want file.h instead.
|
|
*/
|
|
|
|
#ifndef __LINUX_FDTABLE_H
|
|
#define __LINUX_FDTABLE_H
|
|
|
|
#include <linux/posix_types.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/nospec.h>
|
|
#include <linux/types.h>
|
|
#include <linux/init.h>
|
|
#include <linux/fs.h>
|
|
|
|
#include <linux/atomic.h>
|
|
|
|
/*
|
|
* The default fd array needs to be at least BITS_PER_LONG,
|
|
* as this is the granularity returned by copy_fdset().
|
|
*/
|
|
#define NR_OPEN_DEFAULT BITS_PER_LONG
|
|
#define NR_OPEN_MAX ~0U
|
|
|
|
struct fdtable {
|
|
unsigned int max_fds;
|
|
struct file __rcu **fd; /* current fd array */
|
|
unsigned long *close_on_exec;
|
|
unsigned long *open_fds;
|
|
unsigned long *full_fds_bits;
|
|
struct rcu_head rcu;
|
|
};
|
|
|
|
static inline bool close_on_exec(unsigned int fd, const struct fdtable *fdt)
|
|
{
|
|
return test_bit(fd, fdt->close_on_exec);
|
|
}
|
|
|
|
static inline bool fd_is_open(unsigned int fd, const struct fdtable *fdt)
|
|
{
|
|
return test_bit(fd, fdt->open_fds);
|
|
}
|
|
|
|
/*
|
|
* Open file table structure
|
|
*/
|
|
struct files_struct {
|
|
/*
|
|
* read mostly part
|
|
*/
|
|
atomic_t count;
|
|
bool resize_in_progress;
|
|
wait_queue_head_t resize_wait;
|
|
|
|
struct fdtable __rcu *fdt;
|
|
struct fdtable fdtab;
|
|
/*
|
|
* written part on a separate cache line in SMP
|
|
*/
|
|
spinlock_t file_lock ____cacheline_aligned_in_smp;
|
|
unsigned int next_fd;
|
|
unsigned long close_on_exec_init[1];
|
|
unsigned long open_fds_init[1];
|
|
unsigned long full_fds_bits_init[1];
|
|
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
|
|
};
|
|
|
|
struct file_operations;
|
|
struct vfsmount;
|
|
struct dentry;
|
|
|
|
#define rcu_dereference_check_fdtable(files, fdtfd) \
|
|
rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock))
|
|
|
|
#define files_fdtable(files) \
|
|
rcu_dereference_check_fdtable((files), (files)->fdt)
|
|
|
|
/*
|
|
* The caller must ensure that fd table isn't shared or hold rcu or file lock
|
|
*/
|
|
static inline struct file *files_lookup_fd_raw(struct files_struct *files, unsigned int fd)
|
|
{
|
|
struct fdtable *fdt = rcu_dereference_raw(files->fdt);
|
|
|
|
if (fd < fdt->max_fds) {
|
|
fd = array_index_nospec(fd, fdt->max_fds);
|
|
return rcu_dereference_raw(fdt->fd[fd]);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
static inline struct file *files_lookup_fd_locked(struct files_struct *files, unsigned int fd)
|
|
{
|
|
RCU_LOCKDEP_WARN(!lockdep_is_held(&files->file_lock),
|
|
"suspicious rcu_dereference_check() usage");
|
|
return files_lookup_fd_raw(files, fd);
|
|
}
|
|
|
|
struct file *lookup_fdget_rcu(unsigned int fd);
|
|
struct file *task_lookup_fdget_rcu(struct task_struct *task, unsigned int fd);
|
|
struct file *task_lookup_next_fdget_rcu(struct task_struct *task, unsigned int *fd);
|
|
|
|
struct task_struct;
|
|
|
|
void put_files_struct(struct files_struct *fs);
|
|
int unshare_files(void);
|
|
struct files_struct *dup_fd(struct files_struct *, unsigned, int *) __latent_entropy;
|
|
void do_close_on_exec(struct files_struct *);
|
|
int iterate_fd(struct files_struct *, unsigned,
|
|
int (*)(const void *, struct file *, unsigned),
|
|
const void *);
|
|
|
|
extern int close_fd(unsigned int fd);
|
|
extern int __close_range(unsigned int fd, unsigned int max_fd, unsigned int flags);
|
|
extern struct file *close_fd_get_file(unsigned int fd);
|
|
extern int unshare_fd(unsigned long unshare_flags, unsigned int max_fds,
|
|
struct files_struct **new_fdp);
|
|
|
|
extern struct kmem_cache *files_cachep;
|
|
|
|
#endif /* __LINUX_FDTABLE_H */
|