linux-yocto/tools/testing/selftests/pidfd/pidfd.h
Linus Torvalds 100ceb4817 vfs-6.14-rc1.mount.v2
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZ44+LwAKCRCRxhvAZXjc
 orNaAQCGDqtxgqgGLsdx9dw7yTxOm9opYBaG5qN7KiThLAz2PwD+MsHNNlLVEOKU
 IQo9pa23UFUhTipFSeszOWza5SGlxg4=
 =hdst
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.14-rc1.mount.v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs mount updates from Christian Brauner:

 - Add a mountinfo program to demonstrate statmount()/listmount()

   Add a new "mountinfo" sample userland program that demonstrates how
   to use statmount() and listmount() to get at the same info that
   /proc/pid/mountinfo provides

 - Remove pointless nospec.h include

 - Prepend statmount.mnt_opts string with security_sb_mnt_opts()

   Currently these mount options aren't accessible via statmount()

 - Add new mount namespaces to mount namespace rbtree outside of the
   namespace semaphore

 - Lockless mount namespace lookup

   Currently we take the read lock when looking for a mount namespace to
   list mounts in. We can make this lockless. The simple search case can
   just use a sequence counter to detect concurrent changes to the
   rbtree

   For walking the list of mount namespaces sequentially via nsfs we
   keep a separate rcu list as rb_prev() and rb_next() aren't usable
   safely with rcu. Currently there is no primitive for retrieving the
   previous list member. To do this we need a new deletion primitive
   that doesn't poison the prev pointer and a corresponding retrieval
   helper

   Since creating mount namespaces is a relatively rare event compared
   with querying mounts in a foreign mount namespace this is worth it.
   Once libmount and systemd pick up this mechanism to list mounts in
   foreign mount namespaces this will be used very frequently

     - Add extended selftests for lockless mount namespace iteration

     - Add a sample program to list all mounts on the system, i.e., in
       all mount namespaces

 - Improve mount namespace iteration performance

   Make finding the last or first mount to start iterating the mount
   namespace from an O(1) operation and add selftests for iterating the
   mount table starting from the first and last mount

 - Use an xarray for the old mount id

   While the ida does use the xarray internally we can use it explicitly
   which allows us to increment the unique mount id under the xa lock.
   This allows us to remove the atomic as we're now allocating both ids
   in one go

 - Use a shared header for vfs sample programs

 - Fix build warnings for new sample program to list all mounts

* tag 'vfs-6.14-rc1.mount.v2' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  samples/vfs: fix build warnings
  samples/vfs: use shared header
  samples/vfs/mountinfo: Use __u64 instead of uint64_t
  fs: remove useless lockdep assertion
  fs: use xarray for old mount id
  selftests: add listmount() iteration tests
  fs: cache first and last mount
  samples: add test-list-all-mounts
  selftests: remove unneeded include
  selftests: add tests for mntns iteration
  seltests: move nsfs into filesystems subfolder
  fs: simplify rwlock to spinlock
  fs: lockless mntns lookup for nsfs
  rculist: add list_bidir_{del,prev}_rcu()
  fs: lockless mntns rbtree lookup
  fs: add mount namespace to rbtree late
  fs: prepend statmount.mnt_opts string with security_sb_mnt_opts()
  mount: remove inlude/nospec.h include
  samples: add a mountinfo program to demonstrate statmount()/listmount()
2025-01-20 10:44:51 -08:00

156 lines
3.1 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PIDFD_H
#define __PIDFD_H
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "../kselftest.h"
#include "../clone3/clone3_selftests.h"
#ifndef P_PIDFD
#define P_PIDFD 3
#endif
#ifndef CLONE_NEWTIME
#define CLONE_NEWTIME 0x00000080
#endif
#ifndef CLONE_PIDFD
#define CLONE_PIDFD 0x00001000
#endif
#ifndef __NR_pidfd_open
#define __NR_pidfd_open -1
#endif
#ifndef __NR_pidfd_send_signal
#define __NR_pidfd_send_signal -1
#endif
#ifndef __NR_clone3
#define __NR_clone3 -1
#endif
#ifndef __NR_pidfd_getfd
#define __NR_pidfd_getfd -1
#endif
#ifndef PIDFD_NONBLOCK
#define PIDFD_NONBLOCK O_NONBLOCK
#endif
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
* So we need to use a pid > 300 in order to test recycling.
*/
#define PID_RECYCLE 1000
/*
* Define a few custom error codes for the child process to clearly indicate
* what is happening. This way we can tell the difference between a system
* error, a test error, etc.
*/
#define PIDFD_PASS 0
#define PIDFD_FAIL 1
#define PIDFD_ERROR 2
#define PIDFD_SKIP 3
#define PIDFD_XFAIL 4
static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options)
{
return syscall(__NR_waitid, which, pid, info, options, NULL);
}
static inline int wait_for_pid(pid_t pid)
{
int status, ret;
again:
ret = waitpid(pid, &status, 0);
if (ret == -1) {
if (errno == EINTR)
goto again;
ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
return -1;
}
if (!WIFEXITED(status)) {
ksft_print_msg(
"waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
WIFSIGNALED(status), WTERMSIG(status));
return -1;
}
ret = WEXITSTATUS(status);
return ret;
}
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
{
return syscall(__NR_pidfd_open, pid, flags);
}
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
unsigned int flags)
{
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
}
static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
{
return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
}
static inline int sys_memfd_create(const char *name, unsigned int flags)
{
return syscall(__NR_memfd_create, name, flags);
}
static inline pid_t create_child(int *pidfd, unsigned flags)
{
struct __clone_args args = {
.flags = CLONE_PIDFD | flags,
.exit_signal = SIGCHLD,
.pidfd = ptr_to_u64(pidfd),
};
return sys_clone3(&args, sizeof(struct __clone_args));
}
static inline ssize_t read_nointr(int fd, void *buf, size_t count)
{
ssize_t ret;
do {
ret = read(fd, buf, count);
} while (ret < 0 && errno == EINTR);
return ret;
}
static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
{
ssize_t ret;
do {
ret = write(fd, buf, count);
} while (ret < 0 && errno == EINTR);
return ret;
}
#endif /* __PIDFD_H */