mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-23 07:23:12 +02:00

-----BEGIN PGP SIGNATURE----- iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ44+LwAKCRCRxhvAZXjc orNaAQCGDqtxgqgGLsdx9dw7yTxOm9opYBaG5qN7KiThLAz2PwD+MsHNNlLVEOKU IQo9pa23UFUhTipFSeszOWza5SGlxg4= =hdst -----END PGP SIGNATURE----- Merge tag 'vfs-6.14-rc1.mount.v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs Pull vfs mount updates from Christian Brauner: - Add a mountinfo program to demonstrate statmount()/listmount() Add a new "mountinfo" sample userland program that demonstrates how to use statmount() and listmount() to get at the same info that /proc/pid/mountinfo provides - Remove pointless nospec.h include - Prepend statmount.mnt_opts string with security_sb_mnt_opts() Currently these mount options aren't accessible via statmount() - Add new mount namespaces to mount namespace rbtree outside of the namespace semaphore - Lockless mount namespace lookup Currently we take the read lock when looking for a mount namespace to list mounts in. We can make this lockless. The simple search case can just use a sequence counter to detect concurrent changes to the rbtree For walking the list of mount namespaces sequentially via nsfs we keep a separate rcu list as rb_prev() and rb_next() aren't usable safely with rcu. Currently there is no primitive for retrieving the previous list member. To do this we need a new deletion primitive that doesn't poison the prev pointer and a corresponding retrieval helper Since creating mount namespaces is a relatively rare event compared with querying mounts in a foreign mount namespace this is worth it. Once libmount and systemd pick up this mechanism to list mounts in foreign mount namespaces this will be used very frequently - Add extended selftests for lockless mount namespace iteration - Add a sample program to list all mounts on the system, i.e., in all mount namespaces - Improve mount namespace iteration performance Make finding the last or first mount to start iterating the mount namespace from an O(1) operation and add selftests for iterating the mount table starting from the first and last mount - Use an xarray for the old mount id While the ida does use the xarray internally we can use it explicitly which allows us to increment the unique mount id under the xa lock. This allows us to remove the atomic as we're now allocating both ids in one go - Use a shared header for vfs sample programs - Fix build warnings for new sample program to list all mounts * tag 'vfs-6.14-rc1.mount.v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: samples/vfs: fix build warnings samples/vfs: use shared header samples/vfs/mountinfo: Use __u64 instead of uint64_t fs: remove useless lockdep assertion fs: use xarray for old mount id selftests: add listmount() iteration tests fs: cache first and last mount samples: add test-list-all-mounts selftests: remove unneeded include selftests: add tests for mntns iteration seltests: move nsfs into filesystems subfolder fs: simplify rwlock to spinlock fs: lockless mntns lookup for nsfs rculist: add list_bidir_{del,prev}_rcu() fs: lockless mntns rbtree lookup fs: add mount namespace to rbtree late fs: prepend statmount.mnt_opts string with security_sb_mnt_opts() mount: remove inlude/nospec.h include samples: add a mountinfo program to demonstrate statmount()/listmount()
156 lines
3.1 KiB
C
156 lines
3.1 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef __PIDFD_H
|
|
#define __PIDFD_H
|
|
|
|
#define _GNU_SOURCE
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <sched.h>
|
|
#include <signal.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <syscall.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
|
|
#include "../kselftest.h"
|
|
#include "../clone3/clone3_selftests.h"
|
|
|
|
#ifndef P_PIDFD
|
|
#define P_PIDFD 3
|
|
#endif
|
|
|
|
#ifndef CLONE_NEWTIME
|
|
#define CLONE_NEWTIME 0x00000080
|
|
#endif
|
|
|
|
#ifndef CLONE_PIDFD
|
|
#define CLONE_PIDFD 0x00001000
|
|
#endif
|
|
|
|
#ifndef __NR_pidfd_open
|
|
#define __NR_pidfd_open -1
|
|
#endif
|
|
|
|
#ifndef __NR_pidfd_send_signal
|
|
#define __NR_pidfd_send_signal -1
|
|
#endif
|
|
|
|
#ifndef __NR_clone3
|
|
#define __NR_clone3 -1
|
|
#endif
|
|
|
|
#ifndef __NR_pidfd_getfd
|
|
#define __NR_pidfd_getfd -1
|
|
#endif
|
|
|
|
#ifndef PIDFD_NONBLOCK
|
|
#define PIDFD_NONBLOCK O_NONBLOCK
|
|
#endif
|
|
|
|
/*
|
|
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
|
|
* That means, when it wraps around any pid < 300 will be skipped.
|
|
* So we need to use a pid > 300 in order to test recycling.
|
|
*/
|
|
#define PID_RECYCLE 1000
|
|
|
|
/*
|
|
* Define a few custom error codes for the child process to clearly indicate
|
|
* what is happening. This way we can tell the difference between a system
|
|
* error, a test error, etc.
|
|
*/
|
|
#define PIDFD_PASS 0
|
|
#define PIDFD_FAIL 1
|
|
#define PIDFD_ERROR 2
|
|
#define PIDFD_SKIP 3
|
|
#define PIDFD_XFAIL 4
|
|
|
|
static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options)
|
|
{
|
|
return syscall(__NR_waitid, which, pid, info, options, NULL);
|
|
}
|
|
|
|
static inline int wait_for_pid(pid_t pid)
|
|
{
|
|
int status, ret;
|
|
|
|
again:
|
|
ret = waitpid(pid, &status, 0);
|
|
if (ret == -1) {
|
|
if (errno == EINTR)
|
|
goto again;
|
|
|
|
ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
|
|
return -1;
|
|
}
|
|
|
|
if (!WIFEXITED(status)) {
|
|
ksft_print_msg(
|
|
"waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
|
|
WIFSIGNALED(status), WTERMSIG(status));
|
|
return -1;
|
|
}
|
|
|
|
ret = WEXITSTATUS(status);
|
|
return ret;
|
|
}
|
|
|
|
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
|
|
{
|
|
return syscall(__NR_pidfd_open, pid, flags);
|
|
}
|
|
|
|
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
|
|
unsigned int flags)
|
|
{
|
|
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
|
|
}
|
|
|
|
static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
|
|
{
|
|
return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
|
|
}
|
|
|
|
static inline int sys_memfd_create(const char *name, unsigned int flags)
|
|
{
|
|
return syscall(__NR_memfd_create, name, flags);
|
|
}
|
|
|
|
static inline pid_t create_child(int *pidfd, unsigned flags)
|
|
{
|
|
struct __clone_args args = {
|
|
.flags = CLONE_PIDFD | flags,
|
|
.exit_signal = SIGCHLD,
|
|
.pidfd = ptr_to_u64(pidfd),
|
|
};
|
|
|
|
return sys_clone3(&args, sizeof(struct __clone_args));
|
|
}
|
|
|
|
static inline ssize_t read_nointr(int fd, void *buf, size_t count)
|
|
{
|
|
ssize_t ret;
|
|
|
|
do {
|
|
ret = read(fd, buf, count);
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
|
|
{
|
|
ssize_t ret;
|
|
|
|
do {
|
|
ret = write(fd, buf, count);
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
return ret;
|
|
}
|
|
|
|
#endif /* __PIDFD_H */
|