mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 15:03:53 +02:00
for-6.16/io_uring-20250523
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmgwnDgQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpgHZEADA1ym0ihHRjU2kTlXXOdkLLOl+o1RCHUjr KNf6sELGgyDC5FL/hAWdsjonInY4MLbJW0eNHEuuK8iFcn3wSHuHPXhRJXx/4cOs GGVLTd+Jm8ih4UL/GeLrBe3ehW9UUOtz1TCYzho0bdXHQWjruCFTqB5OzPQFMGQW R/lwXVNfjgGno5JhBnsrwz3ZnAfAnJhxqmc0GFHaa/nVF1OREYW/HS75EPFNiFgp Aevilw5QyrA2gDlZ+zCUwaGKAEl32yZCI6LZpI4kMtPK1reEbgFTrzIaCZ/OZCYM DVdBVEeuOmcBYIKbitD/+fcLNXHMrSJSWvUSXR4GuRNVkCTIAcEMKM2bX8VY7gmJ 7ZQIo0EL2mSwmewHIYnvf9w/qrNYR0NyUt2v4U4rA2wj6e5w1EYMriP94wKdBGvD RNxja429N3fg3aBIkdQ6iYSVJRgE7DCo7dnKrEqglZPb32LOiNoOoou9shI5tb25 8X7u0HzbpwKY/XByXZ2IaX7PYK2iFqkJjFYlGehtF97W85LGEvkDFU6fcBdjBO8r umgeE5O+lR+cf68JTJ6P34A7bBg71AXO3ytIuWunG56/0yu/FHDCjhBWE5ZjEhGR u2YhAGPRDQsJlSlxx8TXoKyYWP55NqdeyxYrmku/fZLn5WNVXOFeRlUDAZsF7mU7 nuiOt9j4WA== =k8SF -----END PGP SIGNATURE----- Merge tag 'for-6.16/io_uring-20250523' of git://git.kernel.dk/linux Pull io_uring updates from Jens Axboe: - Avoid indirect function calls in io-wq for executing and freeing work. The design of io-wq is such that it can be a generic mechanism, but as it's just used by io_uring now, may as well avoid these indirect calls - Clean up registered buffers for networking - Add support for IORING_OP_PIPE. Pretty straight forward, allows creating pipes with io_uring, particularly useful for having these be instantiated as direct descriptors - Clean up the coalescing support fore registered buffers - Add support for multiple interface queues for zero-copy rx networking. As this feature was merged for 6.15 it supported just a single ifq per ring - Clean up the eventfd support - Add dma-buf support to zero-copy rx - Clean up and improving the request draining support - Clean up provided buffer support, most notably with an eye toward making the legacy support less intrusive - Minor fdinfo cleanups, dropping support for dumping what credentials are registered - Improve support for overflow CQE handling, getting rid of GFP_ATOMIC for allocating overflow entries where possible - Improve detection of cases where io-wq doesn't need to spawn a new worker unnecessarily - Various little cleanups * tag 'for-6.16/io_uring-20250523' of git://git.kernel.dk/linux: (59 commits) io_uring/cmd: warn on reg buf imports by ineligible cmds io_uring/io-wq: only create a new worker if it can make progress io_uring/io-wq: ignore non-busy worker going to sleep io_uring/io-wq: move hash helpers to the top trace/io_uring: fix io_uring_local_work_run ctx documentation io_uring: finish IOU_OK -> IOU_COMPLETE transition io_uring: add new helpers for posting overflows io_uring: pass in struct io_big_cqe to io_alloc_ocqe() io_uring: make io_alloc_ocqe() take a struct io_cqe pointer io_uring: split alloc and add of overflow io_uring: open code io_req_cqe_overflow() io_uring/fdinfo: get rid of dumping credentials io_uring/fdinfo: only compile if CONFIG_PROC_FS is set io_uring/kbuf: unify legacy buf provision and removal io_uring/kbuf: refactor __io_remove_buffers io_uring/kbuf: don't compute size twice on prep io_uring/kbuf: drop extra vars in io_register_pbuf_ring io_uring/kbuf: use mem_is_zero() io_uring/kbuf: account ring io_buffer_list memory io_uring: drain based on allocates reqs ...
This commit is contained in:
commit
49fffac983
|
@ -40,8 +40,6 @@ enum io_uring_cmd_flags {
|
|||
IO_URING_F_TASK_DEAD = (1 << 13),
|
||||
};
|
||||
|
||||
struct io_zcrx_ifq;
|
||||
|
||||
struct io_wq_work_node {
|
||||
struct io_wq_work_node *next;
|
||||
};
|
||||
|
@ -343,7 +341,6 @@ struct io_ring_ctx {
|
|||
unsigned cached_cq_tail;
|
||||
unsigned cq_entries;
|
||||
struct io_ev_fd __rcu *io_ev_fd;
|
||||
unsigned cq_extra;
|
||||
|
||||
void *cq_wait_arg;
|
||||
size_t cq_wait_size;
|
||||
|
@ -394,7 +391,8 @@ struct io_ring_ctx {
|
|||
struct wait_queue_head poll_wq;
|
||||
struct io_restriction restrictions;
|
||||
|
||||
struct io_zcrx_ifq *ifq;
|
||||
/* Stores zcrx object pointers of type struct io_zcrx_ifq */
|
||||
struct xarray zcrx_ctxs;
|
||||
|
||||
u32 pers_next;
|
||||
struct xarray personalities;
|
||||
|
@ -418,6 +416,7 @@ struct io_ring_ctx {
|
|||
|
||||
struct callback_head poll_wq_task_work;
|
||||
struct list_head defer_list;
|
||||
unsigned nr_drained;
|
||||
|
||||
struct io_alloc_cache msg_cache;
|
||||
spinlock_t msg_lock;
|
||||
|
@ -436,6 +435,7 @@ struct io_ring_ctx {
|
|||
|
||||
/* protected by ->completion_lock */
|
||||
unsigned evfd_last_cq_tail;
|
||||
unsigned nr_req_allocated;
|
||||
|
||||
/*
|
||||
* Protection for resize vs mmap races - both the mmap and resize
|
||||
|
@ -448,8 +448,6 @@ struct io_ring_ctx {
|
|||
struct io_mapped_region ring_region;
|
||||
/* used for optimised request parameter and wait argument passing */
|
||||
struct io_mapped_region param_region;
|
||||
/* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */
|
||||
struct io_mapped_region zcrx_region;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -653,8 +651,7 @@ struct io_kiocb {
|
|||
u8 iopoll_completed;
|
||||
/*
|
||||
* Can be either a fixed buffer index, or used with provided buffers.
|
||||
* For the latter, before issue it points to the buffer group ID,
|
||||
* and after selection it points to the buffer ID itself.
|
||||
* For the latter, it points to the selected buffer ID.
|
||||
*/
|
||||
u16 buf_index;
|
||||
|
||||
|
@ -713,7 +710,7 @@ struct io_kiocb {
|
|||
const struct cred *creds;
|
||||
struct io_wq_work work;
|
||||
|
||||
struct {
|
||||
struct io_big_cqe {
|
||||
u64 extra1;
|
||||
u64 extra2;
|
||||
} big_cqe;
|
||||
|
|
|
@ -645,7 +645,7 @@ TRACE_EVENT(io_uring_short_write,
|
|||
/*
|
||||
* io_uring_local_work_run - ran ring local task work
|
||||
*
|
||||
* @tctx: pointer to a io_uring_ctx
|
||||
* @ctx: pointer to an io_ring_ctx
|
||||
* @count: how many functions it ran
|
||||
* @loops: how many loops it ran
|
||||
*
|
||||
|
|
|
@ -73,6 +73,7 @@ struct io_uring_sqe {
|
|||
__u32 futex_flags;
|
||||
__u32 install_fd_flags;
|
||||
__u32 nop_flags;
|
||||
__u32 pipe_flags;
|
||||
};
|
||||
__u64 user_data; /* data to be passed back at completion time */
|
||||
/* pack this to avoid bogus arm OABI complaints */
|
||||
|
@ -287,6 +288,7 @@ enum io_uring_op {
|
|||
IORING_OP_EPOLL_WAIT,
|
||||
IORING_OP_READV_FIXED,
|
||||
IORING_OP_WRITEV_FIXED,
|
||||
IORING_OP_PIPE,
|
||||
|
||||
/* this goes last, obviously */
|
||||
IORING_OP_LAST,
|
||||
|
@ -992,12 +994,16 @@ struct io_uring_zcrx_offsets {
|
|||
__u64 __resv[2];
|
||||
};
|
||||
|
||||
enum io_uring_zcrx_area_flags {
|
||||
IORING_ZCRX_AREA_DMABUF = 1,
|
||||
};
|
||||
|
||||
struct io_uring_zcrx_area_reg {
|
||||
__u64 addr;
|
||||
__u64 len;
|
||||
__u64 rq_area_token;
|
||||
__u32 flags;
|
||||
__u32 __resv1;
|
||||
__u32 dmabuf_fd;
|
||||
__u64 __resv2[2];
|
||||
};
|
||||
|
||||
|
|
|
@ -7,11 +7,11 @@ GCOV_PROFILE := y
|
|||
endif
|
||||
|
||||
obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
|
||||
tctx.o filetable.o rw.o net.o poll.o \
|
||||
tctx.o filetable.o rw.o poll.o \
|
||||
eventfd.o uring_cmd.o openclose.o \
|
||||
sqpoll.o xattr.o nop.o fs.o splice.o \
|
||||
sync.o msg_ring.o advise.o openclose.o \
|
||||
statx.o timeout.o fdinfo.o cancel.o \
|
||||
statx.o timeout.o cancel.o \
|
||||
waitid.o register.o truncate.o \
|
||||
memmap.o alloc_cache.o
|
||||
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
|
||||
|
@ -19,3 +19,5 @@ obj-$(CONFIG_IO_WQ) += io-wq.o
|
|||
obj-$(CONFIG_FUTEX) += futex.o
|
||||
obj-$(CONFIG_EPOLL) += epoll.o
|
||||
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
|
||||
obj-$(CONFIG_NET) += net.o cmd_net.o
|
||||
obj-$(CONFIG_PROC_FS) += fdinfo.o
|
||||
|
|
|
@ -58,7 +58,7 @@ int io_madvise(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
#else
|
||||
return -EOPNOTSUPP;
|
||||
#endif
|
||||
|
@ -104,5 +104,5 @@ int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -229,7 +229,7 @@ done:
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
static int __io_sync_cancel(struct io_uring_task *tctx,
|
||||
|
|
83
io_uring/cmd_net.c
Normal file
83
io_uring/cmd_net.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
#include <asm/ioctls.h>
|
||||
#include <linux/io_uring/net.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#include "uring_cmd.h"
|
||||
|
||||
static inline int io_uring_cmd_getsockopt(struct socket *sock,
|
||||
struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
const struct io_uring_sqe *sqe = cmd->sqe;
|
||||
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
|
||||
int optlen, optname, level, err;
|
||||
void __user *optval;
|
||||
|
||||
level = READ_ONCE(sqe->level);
|
||||
if (level != SOL_SOCKET)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
|
||||
optname = READ_ONCE(sqe->optname);
|
||||
optlen = READ_ONCE(sqe->optlen);
|
||||
|
||||
err = do_sock_getsockopt(sock, compat, level, optname,
|
||||
USER_SOCKPTR(optval),
|
||||
KERNEL_SOCKPTR(&optlen));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* On success, return optlen */
|
||||
return optlen;
|
||||
}
|
||||
|
||||
static inline int io_uring_cmd_setsockopt(struct socket *sock,
|
||||
struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
const struct io_uring_sqe *sqe = cmd->sqe;
|
||||
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
|
||||
int optname, optlen, level;
|
||||
void __user *optval;
|
||||
sockptr_t optval_s;
|
||||
|
||||
optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
|
||||
optname = READ_ONCE(sqe->optname);
|
||||
optlen = READ_ONCE(sqe->optlen);
|
||||
level = READ_ONCE(sqe->level);
|
||||
optval_s = USER_SOCKPTR(optval);
|
||||
|
||||
return do_sock_setsockopt(sock, compat, level, optname, optval_s,
|
||||
optlen);
|
||||
}
|
||||
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct socket *sock = cmd->file->private_data;
|
||||
struct sock *sk = sock->sk;
|
||||
struct proto *prot = READ_ONCE(sk->sk_prot);
|
||||
int ret, arg = 0;
|
||||
|
||||
if (!prot || !prot->ioctl)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (cmd->cmd_op) {
|
||||
case SOCKET_URING_OP_SIOCINQ:
|
||||
ret = prot->ioctl(sk, SIOCINQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
case SOCKET_URING_OP_SIOCOUTQ:
|
||||
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
case SOCKET_URING_OP_GETSOCKOPT:
|
||||
return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
|
||||
case SOCKET_URING_OP_SETSOCKOPT:
|
||||
return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
|
|
@ -61,7 +61,7 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -88,5 +88,5 @@ int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
|
|||
req_set_fail(req);
|
||||
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -47,13 +47,6 @@ static void io_eventfd_do_signal(struct rcu_head *rcu)
|
|||
io_eventfd_put(ev_fd);
|
||||
}
|
||||
|
||||
static void io_eventfd_release(struct io_ev_fd *ev_fd, bool put_ref)
|
||||
{
|
||||
if (put_ref)
|
||||
io_eventfd_put(ev_fd);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the caller should put the ev_fd reference, false if not.
|
||||
*/
|
||||
|
@ -72,63 +65,34 @@ static bool __io_eventfd_signal(struct io_ev_fd *ev_fd)
|
|||
|
||||
/*
|
||||
* Trigger if eventfd_async isn't set, or if it's set and the caller is
|
||||
* an async worker. If ev_fd isn't valid, obviously return false.
|
||||
* an async worker.
|
||||
*/
|
||||
static bool io_eventfd_trigger(struct io_ev_fd *ev_fd)
|
||||
{
|
||||
if (ev_fd)
|
||||
return !ev_fd->eventfd_async || io_wq_current_is_worker();
|
||||
return false;
|
||||
return !ev_fd->eventfd_async || io_wq_current_is_worker();
|
||||
}
|
||||
|
||||
/*
|
||||
* On success, returns with an ev_fd reference grabbed and the RCU read
|
||||
* lock held.
|
||||
*/
|
||||
static struct io_ev_fd *io_eventfd_grab(struct io_ring_ctx *ctx)
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event)
|
||||
{
|
||||
bool skip = false;
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED)
|
||||
return NULL;
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
/*
|
||||
* rcu_dereference ctx->io_ev_fd once and use it for both for checking
|
||||
* and eventfd_signal
|
||||
*/
|
||||
guard(rcu)();
|
||||
ev_fd = rcu_dereference(ctx->io_ev_fd);
|
||||
|
||||
/*
|
||||
* Check again if ev_fd exists in case an io_eventfd_unregister call
|
||||
* completed between the NULL check of ctx->io_ev_fd at the start of
|
||||
* the function and rcu_read_lock.
|
||||
*/
|
||||
if (io_eventfd_trigger(ev_fd) && refcount_inc_not_zero(&ev_fd->refs))
|
||||
return ev_fd;
|
||||
|
||||
rcu_read_unlock();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = io_eventfd_grab(ctx);
|
||||
if (ev_fd)
|
||||
io_eventfd_release(ev_fd, __io_eventfd_signal(ev_fd));
|
||||
}
|
||||
|
||||
void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ev_fd *ev_fd;
|
||||
|
||||
ev_fd = io_eventfd_grab(ctx);
|
||||
if (ev_fd) {
|
||||
bool skip, put_ref = true;
|
||||
if (!ev_fd)
|
||||
return;
|
||||
if (!io_eventfd_trigger(ev_fd) || !refcount_inc_not_zero(&ev_fd->refs))
|
||||
return;
|
||||
|
||||
if (cqe_event) {
|
||||
/*
|
||||
* Eventfd should only get triggered when at least one event
|
||||
* has been posted. Some applications rely on the eventfd
|
||||
|
@ -142,12 +106,10 @@ void io_eventfd_flush_signal(struct io_ring_ctx *ctx)
|
|||
skip = ctx->cached_cq_tail == ev_fd->last_cq_tail;
|
||||
ev_fd->last_cq_tail = ctx->cached_cq_tail;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
if (!skip)
|
||||
put_ref = __io_eventfd_signal(ev_fd);
|
||||
|
||||
io_eventfd_release(ev_fd, put_ref);
|
||||
}
|
||||
|
||||
if (skip || __io_eventfd_signal(ev_fd))
|
||||
io_eventfd_put(ev_fd);
|
||||
}
|
||||
|
||||
int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
|
|
|
@ -4,5 +4,4 @@ int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg,
|
|||
unsigned int eventfd_async);
|
||||
int io_eventfd_unregister(struct io_ring_ctx *ctx);
|
||||
|
||||
void io_eventfd_flush_signal(struct io_ring_ctx *ctx);
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx);
|
||||
void io_eventfd_signal(struct io_ring_ctx *ctx, bool cqe_event);
|
||||
|
|
|
@ -15,37 +15,6 @@
|
|||
#include "cancel.h"
|
||||
#include "rsrc.h"
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id,
|
||||
const struct cred *cred)
|
||||
{
|
||||
struct user_namespace *uns = seq_user_ns(m);
|
||||
struct group_info *gi;
|
||||
kernel_cap_t cap;
|
||||
int g;
|
||||
|
||||
seq_printf(m, "%5d\n", id);
|
||||
seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid));
|
||||
seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid));
|
||||
seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid));
|
||||
seq_puts(m, "\n\tGroups:\t");
|
||||
gi = cred->group_info;
|
||||
for (g = 0; g < gi->ngroups; g++) {
|
||||
seq_put_decimal_ull(m, g ? " " : "",
|
||||
from_kgid_munged(uns, gi->gid[g]));
|
||||
}
|
||||
seq_puts(m, "\n\tCapEff:\t");
|
||||
cap = cred->cap_effective;
|
||||
seq_put_hex_ll(m, NULL, cap.val, 16);
|
||||
seq_putc(m, '\n');
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET_RX_BUSY_POLL
|
||||
static __cold void common_tracking_show_fdinfo(struct io_ring_ctx *ctx,
|
||||
struct seq_file *m,
|
||||
|
@ -214,14 +183,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
|
|||
else
|
||||
seq_printf(m, "%5u: <none>\n", i);
|
||||
}
|
||||
if (!xa_empty(&ctx->personalities)) {
|
||||
unsigned long index;
|
||||
const struct cred *cred;
|
||||
|
||||
seq_printf(m, "Personalities:\n");
|
||||
xa_for_each(&ctx->personalities, index, cred)
|
||||
io_uring_show_cred(m, index, cred);
|
||||
}
|
||||
|
||||
seq_puts(m, "PollList:\n");
|
||||
for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) {
|
||||
|
@ -264,4 +225,3 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
|
|||
mutex_unlock(&ctx->uring_lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -90,7 +90,7 @@ int io_renameat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_renameat_cleanup(struct io_kiocb *req)
|
||||
|
@ -141,7 +141,7 @@ int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_unlinkat_cleanup(struct io_kiocb *req)
|
||||
|
@ -185,7 +185,7 @@ int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_mkdirat_cleanup(struct io_kiocb *req)
|
||||
|
@ -235,7 +235,7 @@ int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -281,7 +281,7 @@ int io_linkat(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
req->flags &= ~REQ_F_NEED_CLEANUP;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_link_cleanup(struct io_kiocb *req)
|
||||
|
|
|
@ -234,7 +234,7 @@ int io_futexv_wait(struct io_kiocb *req, unsigned int issue_flags)
|
|||
kfree(futexv);
|
||||
req->async_data = NULL;
|
||||
req->flags &= ~REQ_F_ASYNC_DATA;
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -311,7 +311,7 @@ done:
|
|||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
kfree(ifd);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
@ -328,5 +328,5 @@ int io_futex_wake(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -114,9 +114,6 @@ enum {
|
|||
struct io_wq {
|
||||
unsigned long state;
|
||||
|
||||
free_work_fn *free_work;
|
||||
io_wq_work_fn *do_work;
|
||||
|
||||
struct io_wq_hash *hash;
|
||||
|
||||
atomic_t worker_refs;
|
||||
|
@ -153,6 +150,16 @@ static bool io_acct_cancel_pending_work(struct io_wq *wq,
|
|||
static void create_worker_cb(struct callback_head *cb);
|
||||
static void io_wq_cancel_tw_create(struct io_wq *wq);
|
||||
|
||||
static inline unsigned int __io_get_work_hash(unsigned int work_flags)
|
||||
{
|
||||
return work_flags >> IO_WQ_HASH_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
|
||||
{
|
||||
return __io_get_work_hash(atomic_read(&work->flags));
|
||||
}
|
||||
|
||||
static bool io_worker_get(struct io_worker *worker)
|
||||
{
|
||||
return refcount_inc_not_zero(&worker->ref);
|
||||
|
@ -412,6 +419,30 @@ fail:
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Defer if current and next work are both hashed to the same chain */
|
||||
static bool io_wq_hash_defer(struct io_wq_work *work, struct io_wq_acct *acct)
|
||||
{
|
||||
unsigned int hash, work_flags;
|
||||
struct io_wq_work *next;
|
||||
|
||||
lockdep_assert_held(&acct->lock);
|
||||
|
||||
work_flags = atomic_read(&work->flags);
|
||||
if (!__io_wq_is_hashed(work_flags))
|
||||
return false;
|
||||
|
||||
/* should not happen, io_acct_run_queue() said we had work */
|
||||
if (wq_list_empty(&acct->work_list))
|
||||
return true;
|
||||
|
||||
hash = __io_get_work_hash(work_flags);
|
||||
next = container_of(acct->work_list.first, struct io_wq_work, list);
|
||||
work_flags = atomic_read(&next->flags);
|
||||
if (!__io_wq_is_hashed(work_flags))
|
||||
return false;
|
||||
return hash == __io_get_work_hash(work_flags);
|
||||
}
|
||||
|
||||
static void io_wq_dec_running(struct io_worker *worker)
|
||||
{
|
||||
struct io_wq_acct *acct = io_wq_get_acct(worker);
|
||||
|
@ -422,8 +453,14 @@ static void io_wq_dec_running(struct io_worker *worker)
|
|||
|
||||
if (!atomic_dec_and_test(&acct->nr_running))
|
||||
return;
|
||||
if (!worker->cur_work)
|
||||
return;
|
||||
if (!io_acct_run_queue(acct))
|
||||
return;
|
||||
if (io_wq_hash_defer(worker->cur_work, acct)) {
|
||||
raw_spin_unlock(&acct->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_unlock(&acct->lock);
|
||||
atomic_inc(&acct->nr_running);
|
||||
|
@ -457,16 +494,6 @@ static void __io_worker_idle(struct io_wq_acct *acct, struct io_worker *worker)
|
|||
}
|
||||
}
|
||||
|
||||
static inline unsigned int __io_get_work_hash(unsigned int work_flags)
|
||||
{
|
||||
return work_flags >> IO_WQ_HASH_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
|
||||
{
|
||||
return __io_get_work_hash(atomic_read(&work->flags));
|
||||
}
|
||||
|
||||
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
|
||||
{
|
||||
bool ret = false;
|
||||
|
@ -612,10 +639,10 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
|
|||
if (do_kill &&
|
||||
(work_flags & IO_WQ_WORK_UNBOUND))
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
wq->do_work(work);
|
||||
io_wq_submit_work(work);
|
||||
io_assign_current_work(worker, NULL);
|
||||
|
||||
linked = wq->free_work(work);
|
||||
linked = io_wq_free_work(work);
|
||||
work = next_hashed;
|
||||
if (!work && linked && !io_wq_is_hashed(linked)) {
|
||||
work = linked;
|
||||
|
@ -934,8 +961,8 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
|
|||
{
|
||||
do {
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
wq->do_work(work);
|
||||
work = wq->free_work(work);
|
||||
io_wq_submit_work(work);
|
||||
work = io_wq_free_work(work);
|
||||
} while (work);
|
||||
}
|
||||
|
||||
|
@ -1195,8 +1222,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
int ret, i;
|
||||
struct io_wq *wq;
|
||||
|
||||
if (WARN_ON_ONCE(!data->free_work || !data->do_work))
|
||||
return ERR_PTR(-EINVAL);
|
||||
if (WARN_ON_ONCE(!bounded))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
|
@ -1206,8 +1231,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
|||
|
||||
refcount_inc(&data->hash->refs);
|
||||
wq->hash = data->hash;
|
||||
wq->free_work = data->free_work;
|
||||
wq->do_work = data->do_work;
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
|
|
|
@ -21,9 +21,6 @@ enum io_wq_cancel {
|
|||
IO_WQ_CANCEL_NOTFOUND, /* work not found */
|
||||
};
|
||||
|
||||
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
|
||||
typedef void (io_wq_work_fn)(struct io_wq_work *);
|
||||
|
||||
struct io_wq_hash {
|
||||
refcount_t refs;
|
||||
unsigned long map;
|
||||
|
@ -39,8 +36,6 @@ static inline void io_wq_put_hash(struct io_wq_hash *hash)
|
|||
struct io_wq_data {
|
||||
struct io_wq_hash *hash;
|
||||
struct task_struct *task;
|
||||
io_wq_work_fn *do_work;
|
||||
free_work_fn *free_work;
|
||||
};
|
||||
|
||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
||||
|
|
|
@ -129,7 +129,6 @@
|
|||
struct io_defer_entry {
|
||||
struct list_head list;
|
||||
struct io_kiocb *req;
|
||||
u32 seq;
|
||||
};
|
||||
|
||||
/* requests with any of those set should undergo io_disarm_next() */
|
||||
|
@ -149,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
|
|||
bool is_sqpoll_thread);
|
||||
|
||||
static void io_queue_sqe(struct io_kiocb *req);
|
||||
static void __io_req_caches_free(struct io_ring_ctx *ctx);
|
||||
|
||||
static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray);
|
||||
|
||||
|
@ -359,6 +359,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
|||
INIT_LIST_HEAD(&ctx->tctx_list);
|
||||
ctx->submit_state.free_list.next = NULL;
|
||||
INIT_HLIST_HEAD(&ctx->waitid_list);
|
||||
xa_init_flags(&ctx->zcrx_ctxs, XA_FLAGS_ALLOC);
|
||||
#ifdef CONFIG_FUTEX
|
||||
INIT_HLIST_HEAD(&ctx->futex_list);
|
||||
#endif
|
||||
|
@ -380,25 +381,6 @@ err:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void io_account_cq_overflow(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_rings *r = ctx->rings;
|
||||
|
||||
WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
|
||||
ctx->cq_extra--;
|
||||
}
|
||||
|
||||
static bool req_need_defer(struct io_kiocb *req, u32 seq)
|
||||
{
|
||||
if (unlikely(req->flags & REQ_F_IO_DRAIN)) {
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void io_clean_op(struct io_kiocb *req)
|
||||
{
|
||||
if (unlikely(req->flags & REQ_F_BUFFER_SELECTED))
|
||||
|
@ -537,20 +519,37 @@ void io_req_queue_iowq(struct io_kiocb *req)
|
|||
io_req_task_work_add(req);
|
||||
}
|
||||
|
||||
static unsigned io_linked_nr(struct io_kiocb *req)
|
||||
{
|
||||
struct io_kiocb *tmp;
|
||||
unsigned nr = 0;
|
||||
|
||||
io_for_each_link(tmp, req)
|
||||
nr++;
|
||||
return nr;
|
||||
}
|
||||
|
||||
static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx)
|
||||
{
|
||||
spin_lock(&ctx->completion_lock);
|
||||
bool drain_seen = false, first = true;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
__io_req_caches_free(ctx);
|
||||
|
||||
while (!list_empty(&ctx->defer_list)) {
|
||||
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
|
||||
struct io_defer_entry, list);
|
||||
|
||||
if (req_need_defer(de->req, de->seq))
|
||||
break;
|
||||
drain_seen |= de->req->flags & REQ_F_IO_DRAIN;
|
||||
if ((drain_seen || first) && ctx->nr_req_allocated != ctx->nr_drained)
|
||||
return;
|
||||
|
||||
list_del_init(&de->list);
|
||||
ctx->nr_drained -= io_linked_nr(de->req);
|
||||
io_req_task_queue(de->req);
|
||||
kfree(de);
|
||||
first = false;
|
||||
}
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
||||
|
@ -559,10 +558,8 @@ void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
|||
io_poll_wq_wake(ctx);
|
||||
if (ctx->off_timeout_used)
|
||||
io_flush_timeouts(ctx);
|
||||
if (ctx->drain_active)
|
||||
io_queue_deferred(ctx);
|
||||
if (ctx->has_evfd)
|
||||
io_eventfd_flush_signal(ctx);
|
||||
io_eventfd_signal(ctx, true);
|
||||
}
|
||||
|
||||
static inline void __io_cq_lock(struct io_ring_ctx *ctx)
|
||||
|
@ -701,27 +698,20 @@ static __cold void io_uring_drop_tctx_refs(struct task_struct *task)
|
|||
}
|
||||
}
|
||||
|
||||
static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
|
||||
s32 res, u32 cflags, u64 extra1, u64 extra2)
|
||||
static __cold bool io_cqring_add_overflow(struct io_ring_ctx *ctx,
|
||||
struct io_overflow_cqe *ocqe)
|
||||
{
|
||||
struct io_overflow_cqe *ocqe;
|
||||
size_t ocq_size = sizeof(struct io_overflow_cqe);
|
||||
bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
|
||||
|
||||
lockdep_assert_held(&ctx->completion_lock);
|
||||
|
||||
if (is_cqe32)
|
||||
ocq_size += sizeof(struct io_uring_cqe);
|
||||
|
||||
ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT);
|
||||
trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe);
|
||||
if (!ocqe) {
|
||||
struct io_rings *r = ctx->rings;
|
||||
|
||||
/*
|
||||
* If we're in ring overflow flush mode, or in task cancel mode,
|
||||
* or cannot allocate an overflow entry, then we need to drop it
|
||||
* on the floor.
|
||||
*/
|
||||
io_account_cq_overflow(ctx);
|
||||
WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1);
|
||||
set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq);
|
||||
return false;
|
||||
}
|
||||
|
@ -730,23 +720,35 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
|
|||
atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags);
|
||||
|
||||
}
|
||||
ocqe->cqe.user_data = user_data;
|
||||
ocqe->cqe.res = res;
|
||||
ocqe->cqe.flags = cflags;
|
||||
if (is_cqe32) {
|
||||
ocqe->cqe.big_cqe[0] = extra1;
|
||||
ocqe->cqe.big_cqe[1] = extra2;
|
||||
}
|
||||
list_add_tail(&ocqe->list, &ctx->cq_overflow_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_req_cqe_overflow(struct io_kiocb *req)
|
||||
static struct io_overflow_cqe *io_alloc_ocqe(struct io_ring_ctx *ctx,
|
||||
struct io_cqe *cqe,
|
||||
struct io_big_cqe *big_cqe, gfp_t gfp)
|
||||
{
|
||||
io_cqring_event_overflow(req->ctx, req->cqe.user_data,
|
||||
req->cqe.res, req->cqe.flags,
|
||||
req->big_cqe.extra1, req->big_cqe.extra2);
|
||||
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
|
||||
struct io_overflow_cqe *ocqe;
|
||||
size_t ocq_size = sizeof(struct io_overflow_cqe);
|
||||
bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32);
|
||||
|
||||
if (is_cqe32)
|
||||
ocq_size += sizeof(struct io_uring_cqe);
|
||||
|
||||
ocqe = kzalloc(ocq_size, gfp | __GFP_ACCOUNT);
|
||||
trace_io_uring_cqe_overflow(ctx, cqe->user_data, cqe->res, cqe->flags, ocqe);
|
||||
if (ocqe) {
|
||||
ocqe->cqe.user_data = cqe->user_data;
|
||||
ocqe->cqe.res = cqe->res;
|
||||
ocqe->cqe.flags = cqe->flags;
|
||||
if (is_cqe32 && big_cqe) {
|
||||
ocqe->cqe.big_cqe[0] = big_cqe->extra1;
|
||||
ocqe->cqe.big_cqe[1] = big_cqe->extra2;
|
||||
}
|
||||
}
|
||||
if (big_cqe)
|
||||
big_cqe->extra1 = big_cqe->extra2 = 0;
|
||||
return ocqe;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -791,13 +793,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
|
|||
{
|
||||
struct io_uring_cqe *cqe;
|
||||
|
||||
ctx->cq_extra++;
|
||||
|
||||
/*
|
||||
* If we can't get a cq entry, userspace overflowed the
|
||||
* submission (by quite a lot). Increment the overflow count in
|
||||
* the ring.
|
||||
*/
|
||||
if (likely(io_get_cqe(ctx, &cqe))) {
|
||||
WRITE_ONCE(cqe->user_data, user_data);
|
||||
WRITE_ONCE(cqe->res, res);
|
||||
|
@ -814,14 +809,43 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res,
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
|
||||
{
|
||||
return (struct io_cqe) { .user_data = user_data, .res = res, .flags = cflags };
|
||||
}
|
||||
|
||||
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
|
||||
struct io_big_cqe *big_cqe)
|
||||
{
|
||||
struct io_overflow_cqe *ocqe;
|
||||
|
||||
ocqe = io_alloc_ocqe(ctx, cqe, big_cqe, GFP_KERNEL);
|
||||
spin_lock(&ctx->completion_lock);
|
||||
io_cqring_add_overflow(ctx, ocqe);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
static __cold bool io_cqe_overflow_locked(struct io_ring_ctx *ctx,
|
||||
struct io_cqe *cqe,
|
||||
struct io_big_cqe *big_cqe)
|
||||
{
|
||||
struct io_overflow_cqe *ocqe;
|
||||
|
||||
ocqe = io_alloc_ocqe(ctx, cqe, big_cqe, GFP_ATOMIC);
|
||||
return io_cqring_add_overflow(ctx, ocqe);
|
||||
}
|
||||
|
||||
bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
|
||||
{
|
||||
bool filled;
|
||||
|
||||
io_cq_lock(ctx);
|
||||
filled = io_fill_cqe_aux(ctx, user_data, res, cflags);
|
||||
if (!filled)
|
||||
filled = io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
|
||||
if (unlikely(!filled)) {
|
||||
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
|
||||
|
||||
filled = io_cqe_overflow_locked(ctx, &cqe, NULL);
|
||||
}
|
||||
io_cq_unlock_post(ctx);
|
||||
return filled;
|
||||
}
|
||||
|
@ -832,10 +856,13 @@ bool io_post_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags
|
|||
*/
|
||||
void io_add_aux_cqe(struct io_ring_ctx *ctx, u64 user_data, s32 res, u32 cflags)
|
||||
{
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
lockdep_assert(ctx->lockless_cq);
|
||||
|
||||
if (!io_fill_cqe_aux(ctx, user_data, res, cflags)) {
|
||||
spin_lock(&ctx->completion_lock);
|
||||
io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
struct io_cqe cqe = io_init_cqe(user_data, res, cflags);
|
||||
|
||||
io_cqe_overflow(ctx, &cqe, NULL);
|
||||
}
|
||||
ctx->submit_state.cq_flush = true;
|
||||
}
|
||||
|
@ -924,22 +951,6 @@ void io_req_defer_failed(struct io_kiocb *req, s32 res)
|
|||
io_req_complete_defer(req);
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't initialise the fields below on every allocation, but do that in
|
||||
* advance and keep them valid across allocations.
|
||||
*/
|
||||
static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
|
||||
{
|
||||
req->ctx = ctx;
|
||||
req->buf_node = NULL;
|
||||
req->file_node = NULL;
|
||||
req->link = NULL;
|
||||
req->async_data = NULL;
|
||||
/* not necessary, but safer to zero */
|
||||
memset(&req->cqe, 0, sizeof(req->cqe));
|
||||
memset(&req->big_cqe, 0, sizeof(req->big_cqe));
|
||||
}
|
||||
|
||||
/*
|
||||
* A request might get retired back into the request caches even before opcode
|
||||
* handlers and io_issue_sqe() are done with it, e.g. inline completion path.
|
||||
|
@ -949,7 +960,7 @@ static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx)
|
|||
__cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;
|
||||
gfp_t gfp = GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO;
|
||||
void *reqs[IO_REQ_ALLOC_BATCH];
|
||||
int ret;
|
||||
|
||||
|
@ -967,10 +978,11 @@ __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx)
|
|||
}
|
||||
|
||||
percpu_ref_get_many(&ctx->refs, ret);
|
||||
ctx->nr_req_allocated += ret;
|
||||
|
||||
while (ret--) {
|
||||
struct io_kiocb *req = reqs[ret];
|
||||
|
||||
io_preinit_req(req, ctx);
|
||||
io_req_add_to_cache(req, ctx);
|
||||
}
|
||||
return true;
|
||||
|
@ -1192,7 +1204,7 @@ static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
|
|||
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG)
|
||||
atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
|
||||
if (ctx->has_evfd)
|
||||
io_eventfd_signal(ctx);
|
||||
io_eventfd_signal(ctx, false);
|
||||
}
|
||||
|
||||
nr_wait = atomic_read(&ctx->cq_wait_nr);
|
||||
|
@ -1384,6 +1396,16 @@ void io_queue_next(struct io_kiocb *req)
|
|||
io_req_task_queue(nxt);
|
||||
}
|
||||
|
||||
static inline void io_req_put_rsrc_nodes(struct io_kiocb *req)
|
||||
{
|
||||
if (req->file_node) {
|
||||
io_put_rsrc_node(req->ctx, req->file_node);
|
||||
req->file_node = NULL;
|
||||
}
|
||||
if (req->flags & REQ_F_BUF_NODE)
|
||||
io_put_rsrc_node(req->ctx, req->buf_node);
|
||||
}
|
||||
|
||||
static void io_free_batch_list(struct io_ring_ctx *ctx,
|
||||
struct io_wq_work_node *node)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
|
@ -1444,13 +1466,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
|||
*/
|
||||
if (!(req->flags & (REQ_F_CQE_SKIP | REQ_F_REISSUE)) &&
|
||||
unlikely(!io_fill_cqe_req(ctx, req))) {
|
||||
if (ctx->lockless_cq) {
|
||||
spin_lock(&ctx->completion_lock);
|
||||
io_req_cqe_overflow(req);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
} else {
|
||||
io_req_cqe_overflow(req);
|
||||
}
|
||||
if (ctx->lockless_cq)
|
||||
io_cqe_overflow(ctx, &req->cqe, &req->big_cqe);
|
||||
else
|
||||
io_cqe_overflow_locked(ctx, &req->cqe, &req->big_cqe);
|
||||
}
|
||||
}
|
||||
__io_cq_unlock_post(ctx);
|
||||
|
@ -1459,6 +1478,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx)
|
|||
io_free_batch_list(ctx, state->compl_reqs.first);
|
||||
INIT_WQ_LIST(&state->compl_reqs);
|
||||
}
|
||||
|
||||
if (unlikely(ctx->drain_active))
|
||||
io_queue_deferred(ctx);
|
||||
|
||||
ctx->submit_state.cq_flush = false;
|
||||
}
|
||||
|
||||
|
@ -1646,56 +1669,28 @@ io_req_flags_t io_file_get_flags(struct file *file)
|
|||
return res;
|
||||
}
|
||||
|
||||
static u32 io_get_sequence(struct io_kiocb *req)
|
||||
{
|
||||
u32 seq = req->ctx->cached_sq_head;
|
||||
struct io_kiocb *cur;
|
||||
|
||||
/* need original cached_sq_head, but it was increased for each req */
|
||||
io_for_each_link(cur, req)
|
||||
seq--;
|
||||
return seq;
|
||||
}
|
||||
|
||||
static __cold void io_drain_req(struct io_kiocb *req)
|
||||
__must_hold(&ctx->uring_lock)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
bool drain = req->flags & IOSQE_IO_DRAIN;
|
||||
struct io_defer_entry *de;
|
||||
int ret;
|
||||
u32 seq = io_get_sequence(req);
|
||||
|
||||
/* Still need defer if there is pending req in defer list. */
|
||||
spin_lock(&ctx->completion_lock);
|
||||
if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) {
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
queue:
|
||||
ctx->drain_active = false;
|
||||
io_req_task_queue(req);
|
||||
de = kmalloc(sizeof(*de), GFP_KERNEL_ACCOUNT);
|
||||
if (!de) {
|
||||
io_req_defer_failed(req, -ENOMEM);
|
||||
return;
|
||||
}
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
io_prep_async_link(req);
|
||||
de = kmalloc(sizeof(*de), GFP_KERNEL);
|
||||
if (!de) {
|
||||
ret = -ENOMEM;
|
||||
io_req_defer_failed(req, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) {
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
kfree(de);
|
||||
goto queue;
|
||||
}
|
||||
|
||||
trace_io_uring_defer(req);
|
||||
de->req = req;
|
||||
de->seq = seq;
|
||||
|
||||
ctx->nr_drained += io_linked_nr(req);
|
||||
list_add_tail(&de->list, &ctx->defer_list);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
io_queue_deferred(ctx);
|
||||
if (!drain && list_empty(&ctx->defer_list))
|
||||
ctx->drain_active = false;
|
||||
}
|
||||
|
||||
static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
|
||||
|
@ -1757,7 +1752,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = __io_issue_sqe(req, issue_flags, def);
|
||||
|
||||
if (ret == IOU_OK) {
|
||||
if (ret == IOU_COMPLETE) {
|
||||
if (issue_flags & IO_URING_F_COMPLETE_DEFER)
|
||||
io_req_complete_defer(req);
|
||||
else
|
||||
|
@ -1816,7 +1811,7 @@ void io_wq_submit_work(struct io_wq_work *work)
|
|||
bool needs_poll = false;
|
||||
int ret = 0, err = -ECANCELED;
|
||||
|
||||
/* one will be dropped by ->io_wq_free_work() after returning to io-wq */
|
||||
/* one will be dropped by io_wq_free_work() after returning to io-wq */
|
||||
if (!(req->flags & REQ_F_REFCOUNT))
|
||||
__io_req_set_refcount(req, 2);
|
||||
else
|
||||
|
@ -1914,7 +1909,8 @@ inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
|||
io_ring_submit_lock(ctx, issue_flags);
|
||||
node = io_rsrc_node_lookup(&ctx->file_table.data, fd);
|
||||
if (node) {
|
||||
io_req_assign_rsrc_node(&req->file_node, node);
|
||||
node->refs++;
|
||||
req->file_node = node;
|
||||
req->flags |= io_slot_flags(node);
|
||||
file = io_slot_file(node);
|
||||
}
|
||||
|
@ -2047,7 +2043,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
|||
int personality;
|
||||
u8 opcode;
|
||||
|
||||
/* req is partially pre-initialised, see io_preinit_req() */
|
||||
req->ctx = ctx;
|
||||
req->opcode = opcode = READ_ONCE(sqe->opcode);
|
||||
/* same numerical values with corresponding REQ_F_*, safe to copy */
|
||||
sqe_flags = READ_ONCE(sqe->flags);
|
||||
|
@ -2278,10 +2274,6 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe)
|
|||
(!(ctx->flags & IORING_SETUP_NO_SQARRAY))) {
|
||||
head = READ_ONCE(ctx->sq_array[head]);
|
||||
if (unlikely(head >= ctx->sq_entries)) {
|
||||
/* drop invalid entries */
|
||||
spin_lock(&ctx->completion_lock);
|
||||
ctx->cq_extra--;
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
WRITE_ONCE(ctx->rings->sq_dropped,
|
||||
READ_ONCE(ctx->rings->sq_dropped) + 1);
|
||||
return false;
|
||||
|
@ -2699,21 +2691,26 @@ unsigned long rings_size(unsigned int flags, unsigned int sq_entries,
|
|||
return off;
|
||||
}
|
||||
|
||||
static void io_req_caches_free(struct io_ring_ctx *ctx)
|
||||
static __cold void __io_req_caches_free(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_kiocb *req;
|
||||
int nr = 0;
|
||||
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
|
||||
while (!io_req_cache_empty(ctx)) {
|
||||
req = io_extract_req(ctx);
|
||||
kmem_cache_free(req_cachep, req);
|
||||
nr++;
|
||||
}
|
||||
if (nr)
|
||||
if (nr) {
|
||||
ctx->nr_req_allocated -= nr;
|
||||
percpu_ref_put_many(&ctx->refs, nr);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static __cold void io_req_caches_free(struct io_ring_ctx *ctx)
|
||||
{
|
||||
guard(mutex)(&ctx->uring_lock);
|
||||
__io_req_caches_free(ctx);
|
||||
}
|
||||
|
||||
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
|
@ -2749,6 +2746,9 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
|||
percpu_ref_exit(&ctx->refs);
|
||||
free_uid(ctx->user);
|
||||
io_req_caches_free(ctx);
|
||||
|
||||
WARN_ON_ONCE(ctx->nr_req_allocated);
|
||||
|
||||
if (ctx->hash_map)
|
||||
io_wq_put_hash(ctx->hash_map);
|
||||
io_napi_free(ctx);
|
||||
|
@ -2883,7 +2883,7 @@ static __cold void io_ring_exit_work(struct work_struct *work)
|
|||
io_cqring_overflow_kill(ctx);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
}
|
||||
if (ctx->ifq) {
|
||||
if (!xa_empty(&ctx->zcrx_ctxs)) {
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
io_shutdown_zcrx_ifqs(ctx);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
|
@ -3015,20 +3015,19 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx,
|
|||
struct io_defer_entry *de;
|
||||
LIST_HEAD(list);
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
list_for_each_entry_reverse(de, &ctx->defer_list, list) {
|
||||
if (io_match_task_safe(de->req, tctx, cancel_all)) {
|
||||
list_cut_position(&list, &ctx->defer_list, &de->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
if (list_empty(&list))
|
||||
return false;
|
||||
|
||||
while (!list_empty(&list)) {
|
||||
de = list_first_entry(&list, struct io_defer_entry, list);
|
||||
list_del_init(&de->list);
|
||||
ctx->nr_drained -= io_linked_nr(de->req);
|
||||
io_req_task_queue_fail(de->req, -ECANCELED);
|
||||
kfree(de);
|
||||
}
|
||||
|
@ -3103,8 +3102,8 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
|
|||
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
|
||||
io_allowed_defer_tw_run(ctx))
|
||||
ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0;
|
||||
ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret |= io_cancel_defer_files(ctx, tctx, cancel_all);
|
||||
ret |= io_poll_remove_all(ctx, tctx, cancel_all);
|
||||
ret |= io_waitid_remove_all(ctx, tctx, cancel_all);
|
||||
ret |= io_futex_remove_all(ctx, tctx, cancel_all);
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#endif
|
||||
|
||||
enum {
|
||||
IOU_OK = 0, /* deprecated, use IOU_COMPLETE */
|
||||
IOU_COMPLETE = 0,
|
||||
|
||||
IOU_ISSUE_SKIP_COMPLETE = -EIOCBQUEUED,
|
||||
|
@ -196,7 +195,6 @@ static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx,
|
|||
{
|
||||
io_lockdep_assert_cq_locked(ctx);
|
||||
|
||||
ctx->cq_extra++;
|
||||
ctx->submit_state.cq_flush = true;
|
||||
return io_get_cqe(ctx, cqe_ret);
|
||||
}
|
||||
|
@ -414,7 +412,7 @@ static inline void io_req_complete_defer(struct io_kiocb *req)
|
|||
|
||||
static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx)
|
||||
{
|
||||
if (unlikely(ctx->off_timeout_used || ctx->drain_active ||
|
||||
if (unlikely(ctx->off_timeout_used ||
|
||||
ctx->has_evfd || ctx->poll_activated))
|
||||
__io_commit_cqring_flush(ctx);
|
||||
}
|
||||
|
|
154
io_uring/kbuf.c
154
io_uring/kbuf.c
|
@ -92,7 +92,6 @@ void io_kbuf_drop_legacy(struct io_kiocb *req)
|
|||
{
|
||||
if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
|
||||
return;
|
||||
req->buf_index = req->kbuf->bgid;
|
||||
req->flags &= ~REQ_F_BUFFER_SELECTED;
|
||||
kfree(req->kbuf);
|
||||
req->kbuf = NULL;
|
||||
|
@ -110,7 +109,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
|
|||
bl = io_buffer_get_list(ctx, buf->bgid);
|
||||
list_add(&buf->list, &bl->buf_list);
|
||||
req->flags &= ~REQ_F_BUFFER_SELECTED;
|
||||
req->buf_index = buf->bgid;
|
||||
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return true;
|
||||
|
@ -193,7 +191,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
|
|||
}
|
||||
|
||||
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
unsigned int issue_flags)
|
||||
unsigned buf_group, unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_buffer_list *bl;
|
||||
|
@ -201,7 +199,7 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
|||
|
||||
io_ring_submit_lock(req->ctx, issue_flags);
|
||||
|
||||
bl = io_buffer_get_list(ctx, req->buf_index);
|
||||
bl = io_buffer_get_list(ctx, buf_group);
|
||||
if (likely(bl)) {
|
||||
if (bl->flags & IOBL_BUF_RING)
|
||||
ret = io_ring_buffer_select(req, len, bl, issue_flags);
|
||||
|
@ -302,7 +300,7 @@ int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
|
|||
int ret = -ENOENT;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
bl = io_buffer_get_list(ctx, req->buf_index);
|
||||
bl = io_buffer_get_list(ctx, arg->buf_group);
|
||||
if (unlikely(!bl))
|
||||
goto out_unlock;
|
||||
|
||||
|
@ -335,7 +333,7 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
|
|||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
bl = io_buffer_get_list(ctx, req->buf_index);
|
||||
bl = io_buffer_get_list(ctx, arg->buf_group);
|
||||
if (unlikely(!bl))
|
||||
return -ENOENT;
|
||||
|
||||
|
@ -355,10 +353,9 @@ static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
|
|||
struct io_buffer_list *bl = req->buf_list;
|
||||
bool ret = true;
|
||||
|
||||
if (bl) {
|
||||
if (bl)
|
||||
ret = io_kbuf_commit(req, bl, len, nr);
|
||||
req->buf_index = bl->bgid;
|
||||
}
|
||||
|
||||
req->flags &= ~REQ_F_BUFFER_RING;
|
||||
return ret;
|
||||
}
|
||||
|
@ -379,45 +376,33 @@ unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static int __io_remove_buffers(struct io_ring_ctx *ctx,
|
||||
struct io_buffer_list *bl, unsigned nbufs)
|
||||
static int io_remove_buffers_legacy(struct io_ring_ctx *ctx,
|
||||
struct io_buffer_list *bl,
|
||||
unsigned long nbufs)
|
||||
{
|
||||
unsigned i = 0;
|
||||
|
||||
/* shouldn't happen */
|
||||
if (!nbufs)
|
||||
return 0;
|
||||
|
||||
if (bl->flags & IOBL_BUF_RING) {
|
||||
i = bl->buf_ring->tail - bl->head;
|
||||
io_free_region(ctx, &bl->region);
|
||||
/* make sure it's seen as empty */
|
||||
INIT_LIST_HEAD(&bl->buf_list);
|
||||
bl->flags &= ~IOBL_BUF_RING;
|
||||
return i;
|
||||
}
|
||||
unsigned long i = 0;
|
||||
struct io_buffer *nxt;
|
||||
|
||||
/* protects io_buffers_cache */
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
WARN_ON_ONCE(bl->flags & IOBL_BUF_RING);
|
||||
|
||||
while (!list_empty(&bl->buf_list)) {
|
||||
struct io_buffer *nxt;
|
||||
|
||||
for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) {
|
||||
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
|
||||
list_del(&nxt->list);
|
||||
kfree(nxt);
|
||||
|
||||
if (++i == nbufs)
|
||||
return i;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
|
||||
{
|
||||
__io_remove_buffers(ctx, bl, -1U);
|
||||
if (bl->flags & IOBL_BUF_RING)
|
||||
io_free_region(ctx, &bl->region);
|
||||
else
|
||||
io_remove_buffers_legacy(ctx, bl, -1U);
|
||||
|
||||
kfree(bl);
|
||||
}
|
||||
|
||||
|
@ -465,30 +450,6 @@ int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_buffer_list *bl;
|
||||
int ret = 0;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
|
||||
ret = -ENOENT;
|
||||
bl = io_buffer_get_list(ctx, p->bgid);
|
||||
if (bl) {
|
||||
ret = -EINVAL;
|
||||
/* can't use provide/remove buffers command on mapped buffers */
|
||||
if (!(bl->flags & IOBL_BUF_RING))
|
||||
ret = __io_remove_buffers(ctx, bl, p->nbufs);
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
unsigned long size, tmp_check;
|
||||
|
@ -512,8 +473,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
|||
return -EOVERFLOW;
|
||||
if (check_add_overflow((unsigned long)p->addr, size, &tmp_check))
|
||||
return -EOVERFLOW;
|
||||
|
||||
size = (unsigned long)p->len * p->nbufs;
|
||||
if (!access_ok(u64_to_user_ptr(p->addr), size))
|
||||
return -EFAULT;
|
||||
|
||||
|
@ -552,49 +511,56 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
|
|||
return i ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
|
||||
static int __io_manage_buffers_legacy(struct io_kiocb *req,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
|
||||
int ret;
|
||||
|
||||
if (!bl) {
|
||||
if (req->opcode != IORING_OP_PROVIDE_BUFFERS)
|
||||
return -ENOENT;
|
||||
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
|
||||
if (!bl)
|
||||
return -ENOMEM;
|
||||
|
||||
INIT_LIST_HEAD(&bl->buf_list);
|
||||
ret = io_buffer_add_list(req->ctx, bl, p->bgid);
|
||||
if (ret) {
|
||||
kfree(bl);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
/* can't use provide/remove buffers command on mapped buffers */
|
||||
if (bl->flags & IOBL_BUF_RING)
|
||||
return -EINVAL;
|
||||
if (req->opcode == IORING_OP_PROVIDE_BUFFERS)
|
||||
return io_add_buffers(req->ctx, p, bl);
|
||||
return io_remove_buffers_legacy(req->ctx, bl, p->nbufs);
|
||||
}
|
||||
|
||||
int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_buffer_list *bl;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
|
||||
bl = io_buffer_get_list(ctx, p->bgid);
|
||||
if (unlikely(!bl)) {
|
||||
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
|
||||
if (!bl) {
|
||||
ret = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
INIT_LIST_HEAD(&bl->buf_list);
|
||||
ret = io_buffer_add_list(ctx, bl, p->bgid);
|
||||
if (ret) {
|
||||
kfree(bl);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
/* can't add buffers via this command for a mapped buffer ring */
|
||||
if (bl->flags & IOBL_BUF_RING) {
|
||||
ret = -EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = io_add_buffers(ctx, p, bl);
|
||||
err:
|
||||
ret = __io_manage_buffers_legacy(req, bl);
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
||||
{
|
||||
struct io_uring_buf_reg reg;
|
||||
struct io_buffer_list *bl, *free_bl = NULL;
|
||||
struct io_buffer_list *bl;
|
||||
struct io_uring_region_desc rd;
|
||||
struct io_uring_buf_ring *br;
|
||||
unsigned long mmap_offset;
|
||||
|
@ -605,8 +571,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
|
||||
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
if (!mem_is_zero(reg.resv, sizeof(reg.resv)))
|
||||
return -EINVAL;
|
||||
if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC))
|
||||
return -EINVAL;
|
||||
|
@ -624,7 +589,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
io_destroy_bl(ctx, bl);
|
||||
}
|
||||
|
||||
free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
|
||||
bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT);
|
||||
if (!bl)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -669,7 +634,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
return 0;
|
||||
fail:
|
||||
io_free_region(ctx, &bl->region);
|
||||
kfree(free_bl);
|
||||
kfree(bl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -682,9 +647,7 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
|
|||
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
if (reg.resv[0] || reg.resv[1] || reg.resv[2])
|
||||
return -EINVAL;
|
||||
if (reg.flags)
|
||||
if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags)
|
||||
return -EINVAL;
|
||||
|
||||
bl = io_buffer_get_list(ctx, reg.bgid);
|
||||
|
@ -704,14 +667,11 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
|
|||
{
|
||||
struct io_uring_buf_status buf_status;
|
||||
struct io_buffer_list *bl;
|
||||
int i;
|
||||
|
||||
if (copy_from_user(&buf_status, arg, sizeof(buf_status)))
|
||||
return -EFAULT;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(buf_status.resv); i++)
|
||||
if (buf_status.resv[i])
|
||||
return -EINVAL;
|
||||
if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv)))
|
||||
return -EINVAL;
|
||||
|
||||
bl = io_buffer_get_list(ctx, buf_status.buf_group);
|
||||
if (!bl)
|
||||
|
|
|
@ -55,20 +55,19 @@ struct buf_sel_arg {
|
|||
size_t max_len;
|
||||
unsigned short nr_iovs;
|
||||
unsigned short mode;
|
||||
unsigned buf_group;
|
||||
};
|
||||
|
||||
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
unsigned int issue_flags);
|
||||
unsigned buf_group, unsigned int issue_flags);
|
||||
int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg,
|
||||
unsigned int issue_flags);
|
||||
int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg);
|
||||
void io_destroy_buffers(struct io_ring_ctx *ctx);
|
||||
|
||||
int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
|
@ -94,7 +93,6 @@ static inline bool io_kbuf_recycle_ring(struct io_kiocb *req)
|
|||
* to monopolize the buffer.
|
||||
*/
|
||||
if (req->buf_list) {
|
||||
req->buf_index = req->buf_list->bgid;
|
||||
req->flags &= ~(REQ_F_BUFFER_RING|REQ_F_BUFFERS_COMMIT);
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "memmap.h"
|
||||
#include "kbuf.h"
|
||||
#include "rsrc.h"
|
||||
#include "zcrx.h"
|
||||
|
||||
static void *io_mem_alloc_compound(struct page **pages, int nr_pages,
|
||||
size_t size, gfp_t gfp)
|
||||
|
@ -258,7 +259,8 @@ static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx,
|
|||
loff_t pgoff)
|
||||
{
|
||||
loff_t offset = pgoff << PAGE_SHIFT;
|
||||
unsigned int bgid;
|
||||
unsigned int id;
|
||||
|
||||
|
||||
switch (offset & IORING_OFF_MMAP_MASK) {
|
||||
case IORING_OFF_SQ_RING:
|
||||
|
@ -267,12 +269,13 @@ static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx,
|
|||
case IORING_OFF_SQES:
|
||||
return &ctx->sq_region;
|
||||
case IORING_OFF_PBUF_RING:
|
||||
bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
|
||||
return io_pbuf_get_region(ctx, bgid);
|
||||
id = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT;
|
||||
return io_pbuf_get_region(ctx, id);
|
||||
case IORING_MAP_OFF_PARAM_REGION:
|
||||
return &ctx->param_region;
|
||||
case IORING_MAP_OFF_ZCRX_REGION:
|
||||
return &ctx->zcrx_region;
|
||||
id = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_ZCRX_SHIFT;
|
||||
return io_zcrx_get_region(ctx, id);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
#define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL
|
||||
#define IORING_MAP_OFF_ZCRX_REGION 0x30000000ULL
|
||||
|
||||
struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages);
|
||||
#define IORING_OFF_ZCRX_SHIFT 16
|
||||
|
||||
struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages);
|
||||
|
||||
#ifndef CONFIG_MMU
|
||||
unsigned int io_uring_nommu_mmap_capabilities(struct file *file);
|
||||
|
|
|
@ -328,7 +328,7 @@ done:
|
|||
req_set_fail(req);
|
||||
}
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_uring_sync_msg_ring(struct io_uring_sqe *sqe)
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "rsrc.h"
|
||||
#include "zcrx.h"
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
struct io_shutdown {
|
||||
struct file *file;
|
||||
int how;
|
||||
|
@ -129,7 +128,7 @@ int io_shutdown(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = __sys_shutdown_sock(sock, shutdown->how);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
static bool io_net_retry(struct socket *sock, int flags)
|
||||
|
@ -190,7 +189,6 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
|
|||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
sr->len = 0; /* get from the provided buffer */
|
||||
req->buf_index = sr->buf_group;
|
||||
}
|
||||
|
||||
static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
|
||||
|
@ -359,15 +357,13 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
kmsg->msg.msg_name = &kmsg->addr;
|
||||
kmsg->msg.msg_namelen = addr_len;
|
||||
}
|
||||
if (sr->flags & IORING_RECVSEND_FIXED_BUF)
|
||||
if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
|
||||
req->flags |= REQ_F_IMPORT_BUFFER;
|
||||
return 0;
|
||||
if (!io_do_buffer_select(req)) {
|
||||
ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
|
||||
&kmsg->msg.msg_iter);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
if (req->flags & REQ_F_BUFFER_SELECT)
|
||||
return 0;
|
||||
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
|
||||
}
|
||||
|
||||
static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -409,13 +405,12 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
|
||||
if (sr->msg_flags & MSG_DONTWAIT)
|
||||
req->flags |= REQ_F_NOWAIT;
|
||||
if (req->flags & REQ_F_BUFFER_SELECT)
|
||||
sr->buf_group = req->buf_index;
|
||||
if (sr->flags & IORING_RECVSEND_BUNDLE) {
|
||||
if (req->opcode == IORING_OP_SENDMSG)
|
||||
return -EINVAL;
|
||||
if (!(req->flags & REQ_F_BUFFER_SELECT))
|
||||
return -EINVAL;
|
||||
sr->msg_flags |= MSG_WAITALL;
|
||||
sr->buf_group = req->buf_index;
|
||||
req->buf_list = NULL;
|
||||
req->flags |= REQ_F_MULTISHOT;
|
||||
}
|
||||
|
@ -507,7 +502,7 @@ static inline bool io_send_finish(struct io_kiocb *req, int *ret,
|
|||
/* Otherwise stop bundle and use the current result. */
|
||||
finish:
|
||||
io_req_set_res(req, *ret, cflags);
|
||||
*ret = IOU_OK;
|
||||
*ret = IOU_COMPLETE;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -558,7 +553,7 @@ int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
|
|||
else if (sr->done_io)
|
||||
ret = sr->done_io;
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
|
||||
|
@ -571,6 +566,7 @@ static int io_send_select_buffer(struct io_kiocb *req, unsigned int issue_flags,
|
|||
.iovs = &kmsg->fast_iov,
|
||||
.max_len = min_not_zero(sr->len, INT_MAX),
|
||||
.nr_iovs = 1,
|
||||
.buf_group = sr->buf_group,
|
||||
};
|
||||
|
||||
if (kmsg->vec.iovec) {
|
||||
|
@ -723,7 +719,6 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
|
|||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct io_async_msghdr *kmsg;
|
||||
int ret;
|
||||
|
||||
kmsg = io_msg_alloc_async(req);
|
||||
if (unlikely(!kmsg))
|
||||
|
@ -739,13 +734,10 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
|
|||
kmsg->msg.msg_iocb = NULL;
|
||||
kmsg->msg.msg_ubuf = NULL;
|
||||
|
||||
if (!io_do_buffer_select(req)) {
|
||||
ret = import_ubuf(ITER_DEST, sr->buf, sr->len,
|
||||
&kmsg->msg.msg_iter);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
if (req->flags & REQ_F_BUFFER_SELECT)
|
||||
return 0;
|
||||
return import_ubuf(ITER_DEST, sr->buf, sr->len,
|
||||
&kmsg->msg.msg_iter);
|
||||
}
|
||||
|
||||
return io_recvmsg_copy_hdr(req, kmsg);
|
||||
|
@ -991,7 +983,7 @@ retry_multishot:
|
|||
void __user *buf;
|
||||
size_t len = sr->len;
|
||||
|
||||
buf = io_buffer_select(req, &len, issue_flags);
|
||||
buf = io_buffer_select(req, &len, sr->buf_group, issue_flags);
|
||||
if (!buf)
|
||||
return -ENOBUFS;
|
||||
|
||||
|
@ -1069,6 +1061,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
|||
.iovs = &kmsg->fast_iov,
|
||||
.nr_iovs = 1,
|
||||
.mode = KBUF_MODE_EXPAND,
|
||||
.buf_group = sr->buf_group,
|
||||
};
|
||||
|
||||
if (kmsg->vec.iovec) {
|
||||
|
@ -1101,7 +1094,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
|
|||
void __user *buf;
|
||||
|
||||
*len = sr->len;
|
||||
buf = io_buffer_select(req, len, issue_flags);
|
||||
buf = io_buffer_select(req, len, sr->buf_group, issue_flags);
|
||||
if (!buf)
|
||||
return -ENOBUFS;
|
||||
sr->buf = buf;
|
||||
|
@ -1197,16 +1190,14 @@ int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc);
|
||||
unsigned ifq_idx;
|
||||
|
||||
if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr ||
|
||||
sqe->addr3))
|
||||
if (unlikely(sqe->addr2 || sqe->addr || sqe->addr3))
|
||||
return -EINVAL;
|
||||
|
||||
ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx);
|
||||
if (ifq_idx != 0)
|
||||
return -EINVAL;
|
||||
zc->ifq = req->ctx->ifq;
|
||||
zc->ifq = xa_load(&req->ctx->zcrx_ctxs, ifq_idx);
|
||||
if (!zc->ifq)
|
||||
return -EINVAL;
|
||||
|
||||
zc->len = READ_ONCE(sqe->len);
|
||||
zc->flags = READ_ONCE(sqe->ioprio);
|
||||
zc->msg_flags = READ_ONCE(sqe->msg_flags);
|
||||
|
@ -1327,8 +1318,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
|||
return -ENOMEM;
|
||||
|
||||
if (req->opcode == IORING_OP_SEND_ZC) {
|
||||
if (zc->flags & IORING_RECVSEND_FIXED_BUF)
|
||||
req->flags |= REQ_F_IMPORT_BUFFER;
|
||||
ret = io_send_setup(req, sqe);
|
||||
} else {
|
||||
if (unlikely(sqe->addr2 || sqe->file_index))
|
||||
|
@ -1476,7 +1465,7 @@ int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
|
|||
io_req_msg_cleanup(req, 0);
|
||||
}
|
||||
io_req_set_res(req, ret, IORING_CQE_F_MORE);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
@ -1547,7 +1536,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
|
|||
io_req_msg_cleanup(req, 0);
|
||||
}
|
||||
io_req_set_res(req, ret, IORING_CQE_F_MORE);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_sendrecv_fail(struct io_kiocb *req)
|
||||
|
@ -1711,7 +1700,7 @@ int io_socket(struct io_kiocb *req, unsigned int issue_flags)
|
|||
sock->file_slot);
|
||||
}
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -1778,7 +1767,7 @@ out:
|
|||
req_set_fail(req);
|
||||
io_req_msg_cleanup(req, issue_flags);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_bind_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -1852,4 +1841,3 @@ void io_netmsg_cache_free(const void *entry)
|
|||
io_vec_free(&kmsg->vec);
|
||||
kfree(kmsg);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -68,5 +68,5 @@ done:
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, nop->result, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -112,6 +112,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
|
|||
|
||||
if (unlikely(!io_alloc_req(ctx, ¬if)))
|
||||
return NULL;
|
||||
notif->ctx = ctx;
|
||||
notif->opcode = IORING_OP_NOP;
|
||||
notif->flags = 0;
|
||||
notif->file = NULL;
|
||||
|
|
|
@ -333,13 +333,13 @@ const struct io_issue_def io_issue_defs[] = {
|
|||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.prep = io_provide_buffers_prep,
|
||||
.issue = io_provide_buffers,
|
||||
.issue = io_manage_buffers_legacy,
|
||||
},
|
||||
[IORING_OP_REMOVE_BUFFERS] = {
|
||||
.audit_skip = 1,
|
||||
.iopoll = 1,
|
||||
.prep = io_remove_buffers_prep,
|
||||
.issue = io_remove_buffers,
|
||||
.issue = io_manage_buffers_legacy,
|
||||
},
|
||||
[IORING_OP_TEE] = {
|
||||
.needs_file = 1,
|
||||
|
@ -569,6 +569,10 @@ const struct io_issue_def io_issue_defs[] = {
|
|||
.prep = io_prep_writev_fixed,
|
||||
.issue = io_write,
|
||||
},
|
||||
[IORING_OP_PIPE] = {
|
||||
.prep = io_pipe_prep,
|
||||
.issue = io_pipe,
|
||||
},
|
||||
};
|
||||
|
||||
const struct io_cold_def io_cold_defs[] = {
|
||||
|
@ -815,6 +819,9 @@ const struct io_cold_def io_cold_defs[] = {
|
|||
.cleanup = io_readv_writev_cleanup,
|
||||
.fail = io_rw_fail,
|
||||
},
|
||||
[IORING_OP_PIPE] = {
|
||||
.name = "PIPE",
|
||||
},
|
||||
};
|
||||
|
||||
const char *io_uring_get_opcode(u8 opcode)
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#include <linux/fdtable.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/pipe_fs_i.h>
|
||||
#include <linux/watch_queue.h>
|
||||
#include <linux/io_uring.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
@ -169,7 +171,7 @@ err:
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_openat(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
@ -257,7 +259,7 @@ err:
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -300,5 +302,136 @@ int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
struct io_pipe {
|
||||
struct file *file;
|
||||
int __user *fds;
|
||||
int flags;
|
||||
int file_slot;
|
||||
unsigned long nofile;
|
||||
};
|
||||
|
||||
int io_pipe_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
||||
|
||||
if (sqe->fd || sqe->off || sqe->addr3)
|
||||
return -EINVAL;
|
||||
|
||||
p->fds = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
p->flags = READ_ONCE(sqe->pipe_flags);
|
||||
if (p->flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE))
|
||||
return -EINVAL;
|
||||
|
||||
p->file_slot = READ_ONCE(sqe->file_index);
|
||||
p->nofile = rlimit(RLIMIT_NOFILE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_pipe_fixed(struct io_kiocb *req, struct file **files,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
int ret, fds[2] = { -1, -1 };
|
||||
int slot = p->file_slot;
|
||||
|
||||
if (p->flags & O_CLOEXEC)
|
||||
return -EINVAL;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
|
||||
ret = __io_fixed_fd_install(ctx, files[0], slot);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
fds[0] = ret;
|
||||
files[0] = NULL;
|
||||
|
||||
/*
|
||||
* If a specific slot is given, next one will be used for
|
||||
* the write side.
|
||||
*/
|
||||
if (slot != IORING_FILE_INDEX_ALLOC)
|
||||
slot++;
|
||||
|
||||
ret = __io_fixed_fd_install(ctx, files[1], slot);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
fds[1] = ret;
|
||||
files[1] = NULL;
|
||||
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
if (!copy_to_user(p->fds, fds, sizeof(fds)))
|
||||
return 0;
|
||||
|
||||
ret = -EFAULT;
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
err:
|
||||
if (fds[0] != -1)
|
||||
io_fixed_fd_remove(ctx, fds[0]);
|
||||
if (fds[1] != -1)
|
||||
io_fixed_fd_remove(ctx, fds[1]);
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_pipe_fd(struct io_kiocb *req, struct file **files)
|
||||
{
|
||||
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
||||
int ret, fds[2] = { -1, -1 };
|
||||
|
||||
ret = __get_unused_fd_flags(p->flags, p->nofile);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
fds[0] = ret;
|
||||
|
||||
ret = __get_unused_fd_flags(p->flags, p->nofile);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
fds[1] = ret;
|
||||
|
||||
if (!copy_to_user(p->fds, fds, sizeof(fds))) {
|
||||
fd_install(fds[0], files[0]);
|
||||
fd_install(fds[1], files[1]);
|
||||
return 0;
|
||||
}
|
||||
ret = -EFAULT;
|
||||
err:
|
||||
if (fds[0] != -1)
|
||||
put_unused_fd(fds[0]);
|
||||
if (fds[1] != -1)
|
||||
put_unused_fd(fds[1]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_pipe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
struct io_pipe *p = io_kiocb_to_cmd(req, struct io_pipe);
|
||||
struct file *files[2];
|
||||
int ret;
|
||||
|
||||
ret = create_pipe_files(files, p->flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
files[0]->f_mode |= FMODE_NOWAIT;
|
||||
files[1]->f_mode |= FMODE_NOWAIT;
|
||||
|
||||
if (!!p->file_slot)
|
||||
ret = io_pipe_fixed(req, files, issue_flags);
|
||||
else
|
||||
ret = io_pipe_fd(req, files);
|
||||
|
||||
io_req_set_res(req, ret, 0);
|
||||
if (!ret)
|
||||
return IOU_COMPLETE;
|
||||
|
||||
req_set_fail(req);
|
||||
if (files[0])
|
||||
fput(files[0]);
|
||||
if (files[1])
|
||||
fput(files[1]);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -13,5 +13,8 @@ int io_openat2(struct io_kiocb *req, unsigned int issue_flags);
|
|||
int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_close(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_pipe_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_pipe(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
||||
int io_install_fixed_fd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_install_fixed_fd(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
|
|
@ -893,7 +893,7 @@ int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags);
|
||||
if (ret > 0) {
|
||||
io_req_set_res(req, ipt.result_mask, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
return ret ?: IOU_ISSUE_SKIP_COMPLETE;
|
||||
}
|
||||
|
@ -948,5 +948,5 @@ out:
|
|||
}
|
||||
/* complete update request, we're done with it */
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -80,10 +80,21 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int io_buffer_validate(struct iovec *iov)
|
||||
int io_validate_user_buf_range(u64 uaddr, u64 ulen)
|
||||
{
|
||||
unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
|
||||
unsigned long tmp, base = (unsigned long)uaddr;
|
||||
unsigned long acct_len = (unsigned long)PAGE_ALIGN(ulen);
|
||||
|
||||
/* arbitrary limit, but we need something */
|
||||
if (ulen > SZ_1G || !ulen)
|
||||
return -EFAULT;
|
||||
if (check_add_overflow(base, acct_len, &tmp))
|
||||
return -EOVERFLOW;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_buffer_validate(struct iovec *iov)
|
||||
{
|
||||
/*
|
||||
* Don't impose further limits on the size and buffer
|
||||
* constraints here, we'll -EINVAL later when IO is
|
||||
|
@ -91,17 +102,9 @@ int io_buffer_validate(struct iovec *iov)
|
|||
*/
|
||||
if (!iov->iov_base)
|
||||
return iov->iov_len ? -EFAULT : 0;
|
||||
if (!iov->iov_len)
|
||||
return -EFAULT;
|
||||
|
||||
/* arbitrary limit, but we need something */
|
||||
if (iov->iov_len > SZ_1G)
|
||||
return -EFAULT;
|
||||
|
||||
if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp))
|
||||
return -EOVERFLOW;
|
||||
|
||||
return 0;
|
||||
return io_validate_user_buf_range((unsigned long)iov->iov_base,
|
||||
iov->iov_len);
|
||||
}
|
||||
|
||||
static void io_release_ubuf(void *priv)
|
||||
|
@ -497,7 +500,7 @@ int io_files_update(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
|
@ -685,38 +688,34 @@ static bool io_coalesce_buffer(struct page ***pages, int *nr_pages,
|
|||
struct io_imu_folio_data *data)
|
||||
{
|
||||
struct page **page_array = *pages, **new_array = NULL;
|
||||
int nr_pages_left = *nr_pages, i, j;
|
||||
int nr_folios = data->nr_folios;
|
||||
unsigned nr_pages_left = *nr_pages;
|
||||
unsigned nr_folios = data->nr_folios;
|
||||
unsigned i, j;
|
||||
|
||||
/* Store head pages only*/
|
||||
new_array = kvmalloc_array(nr_folios, sizeof(struct page *),
|
||||
GFP_KERNEL);
|
||||
new_array = kvmalloc_array(nr_folios, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!new_array)
|
||||
return false;
|
||||
|
||||
new_array[0] = compound_head(page_array[0]);
|
||||
/*
|
||||
* The pages are bound to the folio, it doesn't
|
||||
* actually unpin them but drops all but one reference,
|
||||
* which is usually put down by io_buffer_unmap().
|
||||
* Note, needs a better helper.
|
||||
*/
|
||||
if (data->nr_pages_head > 1)
|
||||
unpin_user_pages(&page_array[1], data->nr_pages_head - 1);
|
||||
for (i = 0, j = 0; i < nr_folios; i++) {
|
||||
struct page *p = compound_head(page_array[j]);
|
||||
struct folio *folio = page_folio(p);
|
||||
unsigned int nr;
|
||||
|
||||
j = data->nr_pages_head;
|
||||
nr_pages_left -= data->nr_pages_head;
|
||||
for (i = 1; i < nr_folios; i++) {
|
||||
unsigned int nr_unpin;
|
||||
WARN_ON_ONCE(i > 0 && p != page_array[j]);
|
||||
|
||||
new_array[i] = page_array[j];
|
||||
nr_unpin = min_t(unsigned int, nr_pages_left - 1,
|
||||
data->nr_pages_mid - 1);
|
||||
if (nr_unpin)
|
||||
unpin_user_pages(&page_array[j+1], nr_unpin);
|
||||
j += data->nr_pages_mid;
|
||||
nr_pages_left -= data->nr_pages_mid;
|
||||
nr = i ? data->nr_pages_mid : data->nr_pages_head;
|
||||
nr = min(nr, nr_pages_left);
|
||||
/* Drop all but one ref, the entire folio will remain pinned. */
|
||||
if (nr > 1)
|
||||
unpin_user_folio(folio, nr - 1);
|
||||
j += nr;
|
||||
nr_pages_left -= nr;
|
||||
new_array[i] = p;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(j != *nr_pages);
|
||||
|
||||
kvfree(page_array);
|
||||
*pages = new_array;
|
||||
*nr_pages = nr_folios;
|
||||
|
@ -1062,8 +1061,6 @@ static int io_import_fixed(int ddir, struct iov_iter *iter,
|
|||
size_t offset;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE(!imu))
|
||||
return -EFAULT;
|
||||
ret = validate_fixed_range(buf_addr, len, imu);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
@ -1110,13 +1107,19 @@ inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
|
|||
|
||||
if (req->flags & REQ_F_BUF_NODE)
|
||||
return req->buf_node;
|
||||
req->flags |= REQ_F_BUF_NODE;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
|
||||
if (node)
|
||||
io_req_assign_buf_node(req, node);
|
||||
if (node) {
|
||||
node->refs++;
|
||||
req->buf_node = node;
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return node;
|
||||
}
|
||||
req->flags &= ~REQ_F_BUF_NODE;
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return node;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
|
||||
|
|
|
@ -83,7 +83,7 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
|
|||
unsigned size, unsigned type);
|
||||
int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
|
||||
unsigned int size, unsigned int type);
|
||||
int io_buffer_validate(struct iovec *iov);
|
||||
int io_validate_user_buf_range(u64 uaddr, u64 ulen);
|
||||
|
||||
bool io_check_coalesce_buffer(struct page **page_array, int nr_pages,
|
||||
struct io_imu_folio_data *data);
|
||||
|
@ -115,32 +115,6 @@ static inline bool io_reset_rsrc_node(struct io_ring_ctx *ctx,
|
|||
return true;
|
||||
}
|
||||
|
||||
static inline void io_req_put_rsrc_nodes(struct io_kiocb *req)
|
||||
{
|
||||
if (req->file_node) {
|
||||
io_put_rsrc_node(req->ctx, req->file_node);
|
||||
req->file_node = NULL;
|
||||
}
|
||||
if (req->flags & REQ_F_BUF_NODE) {
|
||||
io_put_rsrc_node(req->ctx, req->buf_node);
|
||||
req->buf_node = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void io_req_assign_rsrc_node(struct io_rsrc_node **dst_node,
|
||||
struct io_rsrc_node *node)
|
||||
{
|
||||
node->refs++;
|
||||
*dst_node = node;
|
||||
}
|
||||
|
||||
static inline void io_req_assign_buf_node(struct io_kiocb *req,
|
||||
struct io_rsrc_node *node)
|
||||
{
|
||||
io_req_assign_rsrc_node(&req->buf_node, node);
|
||||
req->flags |= REQ_F_BUF_NODE;
|
||||
}
|
||||
|
||||
int io_files_update(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
|
||||
|
|
|
@ -119,7 +119,7 @@ static int __io_import_rw_buffer(int ddir, struct io_kiocb *req,
|
|||
return io_import_vec(ddir, req, io, buf, sqe_len);
|
||||
|
||||
if (io_do_buffer_select(req)) {
|
||||
buf = io_buffer_select(req, &sqe_len, issue_flags);
|
||||
buf = io_buffer_select(req, &sqe_len, io->buf_group, issue_flags);
|
||||
if (!buf)
|
||||
return -ENOBUFS;
|
||||
rw->addr = (unsigned long) buf;
|
||||
|
@ -253,16 +253,19 @@ static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
|||
int ddir)
|
||||
{
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
struct io_async_rw *io;
|
||||
unsigned ioprio;
|
||||
u64 attr_type_mask;
|
||||
int ret;
|
||||
|
||||
if (io_rw_alloc_async(req))
|
||||
return -ENOMEM;
|
||||
io = req->async_data;
|
||||
|
||||
rw->kiocb.ki_pos = READ_ONCE(sqe->off);
|
||||
/* used for fixed read/write too - just read unconditionally */
|
||||
req->buf_index = READ_ONCE(sqe->buf_index);
|
||||
io->buf_group = req->buf_index;
|
||||
|
||||
ioprio = READ_ONCE(sqe->ioprio);
|
||||
if (ioprio) {
|
||||
|
@ -658,7 +661,7 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
|
|||
io_req_io_end(req);
|
||||
io_req_set_res(req, final_ret, io_put_kbuf(req, ret, issue_flags));
|
||||
io_req_rw_cleanup(req, issue_flags);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
} else {
|
||||
io_rw_done(req, ret);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@ struct io_async_rw {
|
|||
struct iov_iter iter;
|
||||
struct iov_iter_state iter_state;
|
||||
struct iovec fast_iov;
|
||||
unsigned buf_group;
|
||||
|
||||
/*
|
||||
* wpq is for buffered io, while meta fields are used with
|
||||
* direct io
|
||||
|
|
|
@ -103,7 +103,7 @@ done:
|
|||
if (ret != sp->len)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -144,5 +144,5 @@ done:
|
|||
if (ret != sp->len)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ int io_statx(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = do_statx(sx->dfd, sx->filename, sx->flags, sx->mask, sx->buffer);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
void io_statx_cleanup(struct io_kiocb *req)
|
||||
|
|
|
@ -47,7 +47,7 @@ int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = sync_file_range(req->file, sync->off, sync->len, sync->flags);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -79,7 +79,7 @@ int io_fsync(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = vfs_fsync_range(req->file, sync->off, end > 0 ? end : LLONG_MAX,
|
||||
sync->flags & IORING_FSYNC_DATASYNC);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_fallocate_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
@ -108,5 +108,5 @@ int io_fallocate(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret >= 0)
|
||||
fsnotify_modify(req->file);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -35,8 +35,6 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
|
|||
|
||||
data.hash = hash;
|
||||
data.task = task;
|
||||
data.free_work = io_wq_free_work;
|
||||
data.do_work = io_wq_submit_work;
|
||||
|
||||
/* Do QD, or 4 * CPUS, whatever is smallest */
|
||||
concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
|
||||
|
|
|
@ -35,6 +35,9 @@ struct io_timeout_rem {
|
|||
bool ltimeout;
|
||||
};
|
||||
|
||||
static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
|
||||
struct io_kiocb *link);
|
||||
|
||||
static inline bool io_is_timeout_noseq(struct io_kiocb *req)
|
||||
{
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
|
@ -218,7 +221,9 @@ void io_disarm_next(struct io_kiocb *req)
|
|||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
raw_spin_lock_irq(&ctx->timeout_lock);
|
||||
link = io_disarm_linked_timeout(req);
|
||||
if (req->link && req->link->opcode == IORING_OP_LINK_TIMEOUT)
|
||||
link = __io_disarm_linked_timeout(req, req->link);
|
||||
|
||||
raw_spin_unlock_irq(&ctx->timeout_lock);
|
||||
if (link)
|
||||
io_req_queue_tw_complete(link, -ECANCELED);
|
||||
|
@ -228,8 +233,8 @@ void io_disarm_next(struct io_kiocb *req)
|
|||
io_fail_links(req);
|
||||
}
|
||||
|
||||
struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
|
||||
struct io_kiocb *link)
|
||||
static struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
|
||||
struct io_kiocb *link)
|
||||
__must_hold(&req->ctx->completion_lock)
|
||||
__must_hold(&req->ctx->timeout_lock)
|
||||
{
|
||||
|
@ -500,7 +505,7 @@ int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags)
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
static int __io_timeout_prep(struct io_kiocb *req,
|
||||
|
|
|
@ -8,19 +8,6 @@ struct io_timeout_data {
|
|||
u32 flags;
|
||||
};
|
||||
|
||||
struct io_kiocb *__io_disarm_linked_timeout(struct io_kiocb *req,
|
||||
struct io_kiocb *link);
|
||||
|
||||
static inline struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req)
|
||||
{
|
||||
struct io_kiocb *link = req->link;
|
||||
|
||||
if (link && link->opcode == IORING_OP_LINK_TIMEOUT)
|
||||
return __io_disarm_linked_timeout(req, link);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
__cold void io_flush_timeouts(struct io_ring_ctx *ctx);
|
||||
struct io_cancel_data;
|
||||
int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd);
|
||||
|
|
|
@ -44,5 +44,5 @@ int io_ftruncate(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = do_ftruncate(req->file, ft->len, 1);
|
||||
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -3,13 +3,10 @@
|
|||
#include <linux/errno.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include <linux/io_uring/net.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/nospec.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <asm/ioctls.h>
|
||||
|
||||
#include "io_uring.h"
|
||||
#include "alloc_cache.h"
|
||||
|
@ -268,7 +265,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
|
|||
req_set_fail(req);
|
||||
io_req_uring_cleanup(req, issue_flags);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
|
@ -278,6 +275,9 @@ int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
|||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
|
||||
|
||||
if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED)))
|
||||
return -EINVAL;
|
||||
|
||||
return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
|
||||
|
@ -292,6 +292,9 @@ int io_uring_cmd_import_fixed_vec(struct io_uring_cmd *ioucmd,
|
|||
struct io_async_cmd *ac = req->async_data;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE(!(ioucmd->flags & IORING_URING_CMD_FIXED)))
|
||||
return -EINVAL;
|
||||
|
||||
ret = io_prep_reg_iovec(req, &ac->vec, uvec, uvec_segs);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -307,83 +310,3 @@ void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd)
|
|||
|
||||
io_req_queue_iowq(req);
|
||||
}
|
||||
|
||||
static inline int io_uring_cmd_getsockopt(struct socket *sock,
|
||||
struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
const struct io_uring_sqe *sqe = cmd->sqe;
|
||||
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
|
||||
int optlen, optname, level, err;
|
||||
void __user *optval;
|
||||
|
||||
level = READ_ONCE(sqe->level);
|
||||
if (level != SOL_SOCKET)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
|
||||
optname = READ_ONCE(sqe->optname);
|
||||
optlen = READ_ONCE(sqe->optlen);
|
||||
|
||||
err = do_sock_getsockopt(sock, compat, level, optname,
|
||||
USER_SOCKPTR(optval),
|
||||
KERNEL_SOCKPTR(&optlen));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* On success, return optlen */
|
||||
return optlen;
|
||||
}
|
||||
|
||||
static inline int io_uring_cmd_setsockopt(struct socket *sock,
|
||||
struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
const struct io_uring_sqe *sqe = cmd->sqe;
|
||||
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
|
||||
int optname, optlen, level;
|
||||
void __user *optval;
|
||||
sockptr_t optval_s;
|
||||
|
||||
optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
|
||||
optname = READ_ONCE(sqe->optname);
|
||||
optlen = READ_ONCE(sqe->optlen);
|
||||
level = READ_ONCE(sqe->level);
|
||||
optval_s = USER_SOCKPTR(optval);
|
||||
|
||||
return do_sock_setsockopt(sock, compat, level, optname, optval_s,
|
||||
optlen);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_NET)
|
||||
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct socket *sock = cmd->file->private_data;
|
||||
struct sock *sk = sock->sk;
|
||||
struct proto *prot = READ_ONCE(sk->sk_prot);
|
||||
int ret, arg = 0;
|
||||
|
||||
if (!prot || !prot->ioctl)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (cmd->cmd_op) {
|
||||
case SOCKET_URING_OP_SIOCINQ:
|
||||
ret = prot->ioctl(sk, SIOCINQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
case SOCKET_URING_OP_SIOCOUTQ:
|
||||
ret = prot->ioctl(sk, SIOCOUTQ, &arg);
|
||||
if (ret)
|
||||
return ret;
|
||||
return arg;
|
||||
case SOCKET_URING_OP_GETSOCKOPT:
|
||||
return io_uring_cmd_getsockopt(sock, cmd, issue_flags);
|
||||
case SOCKET_URING_OP_SETSOCKOPT:
|
||||
return io_uring_cmd_setsockopt(sock, cmd, issue_flags);
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_sock);
|
||||
#endif
|
||||
|
|
|
@ -323,5 +323,5 @@ done:
|
|||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
io_req_set_res(req, ret, 0);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
|
@ -109,7 +109,7 @@ int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = file_getxattr(req->file, &ix->ctx);
|
||||
io_xattr_finish(req, ret);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
@ -122,7 +122,7 @@ int io_getxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = filename_getxattr(AT_FDCWD, ix->filename, LOOKUP_FOLLOW, &ix->ctx);
|
||||
ix->filename = NULL;
|
||||
io_xattr_finish(req, ret);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
static int __io_setxattr_prep(struct io_kiocb *req,
|
||||
|
@ -190,7 +190,7 @@ int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
|
||||
ret = file_setxattr(req->file, &ix->ctx);
|
||||
io_xattr_finish(req, ret);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
||||
int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
|
||||
|
@ -203,5 +203,5 @@ int io_setxattr(struct io_kiocb *req, unsigned int issue_flags)
|
|||
ret = filename_setxattr(AT_FDCWD, ix->filename, LOOKUP_FOLLOW, &ix->ctx);
|
||||
ix->filename = NULL;
|
||||
io_xattr_finish(req, ret);
|
||||
return IOU_OK;
|
||||
return IOU_COMPLETE;
|
||||
}
|
||||
|
|
386
io_uring/zcrx.c
386
io_uring/zcrx.c
|
@ -26,27 +26,205 @@
|
|||
#include "zcrx.h"
|
||||
#include "rsrc.h"
|
||||
|
||||
#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
|
||||
|
||||
static inline struct io_zcrx_ifq *io_pp_to_ifq(struct page_pool *pp)
|
||||
{
|
||||
return pp->mp_priv;
|
||||
}
|
||||
|
||||
#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING)
|
||||
static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov)
|
||||
{
|
||||
struct net_iov_area *owner = net_iov_owner(niov);
|
||||
|
||||
return container_of(owner, struct io_zcrx_area, nia);
|
||||
}
|
||||
|
||||
static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
|
||||
{
|
||||
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
|
||||
|
||||
return area->mem.pages[net_iov_idx(niov)];
|
||||
}
|
||||
|
||||
static void io_release_dmabuf(struct io_zcrx_mem *mem)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return;
|
||||
|
||||
if (mem->sgt)
|
||||
dma_buf_unmap_attachment_unlocked(mem->attach, mem->sgt,
|
||||
DMA_FROM_DEVICE);
|
||||
if (mem->attach)
|
||||
dma_buf_detach(mem->dmabuf, mem->attach);
|
||||
if (mem->dmabuf)
|
||||
dma_buf_put(mem->dmabuf);
|
||||
|
||||
mem->sgt = NULL;
|
||||
mem->attach = NULL;
|
||||
mem->dmabuf = NULL;
|
||||
}
|
||||
|
||||
static int io_import_dmabuf(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
unsigned long off = (unsigned long)area_reg->addr;
|
||||
unsigned long len = (unsigned long)area_reg->len;
|
||||
unsigned long total_size = 0;
|
||||
struct scatterlist *sg;
|
||||
int dmabuf_fd = area_reg->dmabuf_fd;
|
||||
int i, ret;
|
||||
|
||||
if (WARN_ON_ONCE(!ifq->dev))
|
||||
return -EFAULT;
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return -EINVAL;
|
||||
|
||||
mem->is_dmabuf = true;
|
||||
mem->dmabuf = dma_buf_get(dmabuf_fd);
|
||||
if (IS_ERR(mem->dmabuf)) {
|
||||
ret = PTR_ERR(mem->dmabuf);
|
||||
mem->dmabuf = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mem->attach = dma_buf_attach(mem->dmabuf, ifq->dev);
|
||||
if (IS_ERR(mem->attach)) {
|
||||
ret = PTR_ERR(mem->attach);
|
||||
mem->attach = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
mem->sgt = dma_buf_map_attachment_unlocked(mem->attach, DMA_FROM_DEVICE);
|
||||
if (IS_ERR(mem->sgt)) {
|
||||
ret = PTR_ERR(mem->sgt);
|
||||
mem->sgt = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
for_each_sgtable_dma_sg(mem->sgt, sg, i)
|
||||
total_size += sg_dma_len(sg);
|
||||
|
||||
if (total_size < off + len)
|
||||
return -EINVAL;
|
||||
|
||||
mem->dmabuf_offset = off;
|
||||
mem->size = len;
|
||||
return 0;
|
||||
err:
|
||||
io_release_dmabuf(mem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_zcrx_map_area_dmabuf(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
{
|
||||
unsigned long off = area->mem.dmabuf_offset;
|
||||
struct scatterlist *sg;
|
||||
unsigned i, niov_idx = 0;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_DMA_SHARED_BUFFER))
|
||||
return -EINVAL;
|
||||
|
||||
for_each_sgtable_dma_sg(area->mem.sgt, sg, i) {
|
||||
dma_addr_t dma = sg_dma_address(sg);
|
||||
unsigned long sg_len = sg_dma_len(sg);
|
||||
unsigned long sg_off = min(sg_len, off);
|
||||
|
||||
off -= sg_off;
|
||||
sg_len -= sg_off;
|
||||
dma += sg_off;
|
||||
|
||||
while (sg_len && niov_idx < area->nia.num_niovs) {
|
||||
struct net_iov *niov = &area->nia.niovs[niov_idx];
|
||||
|
||||
if (net_mp_niov_set_dma_addr(niov, dma))
|
||||
return 0;
|
||||
sg_len -= PAGE_SIZE;
|
||||
dma += PAGE_SIZE;
|
||||
niov_idx++;
|
||||
}
|
||||
}
|
||||
return niov_idx;
|
||||
}
|
||||
|
||||
static int io_import_umem(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
struct page **pages;
|
||||
int nr_pages;
|
||||
|
||||
if (area_reg->dmabuf_fd)
|
||||
return -EINVAL;
|
||||
if (!area_reg->addr)
|
||||
return -EFAULT;
|
||||
pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
|
||||
&nr_pages);
|
||||
if (IS_ERR(pages))
|
||||
return PTR_ERR(pages);
|
||||
|
||||
mem->pages = pages;
|
||||
mem->nr_folios = nr_pages;
|
||||
mem->size = area_reg->len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_release_area_mem(struct io_zcrx_mem *mem)
|
||||
{
|
||||
if (mem->is_dmabuf) {
|
||||
io_release_dmabuf(mem);
|
||||
return;
|
||||
}
|
||||
if (mem->pages) {
|
||||
unpin_user_pages(mem->pages, mem->nr_folios);
|
||||
kvfree(mem->pages);
|
||||
}
|
||||
}
|
||||
|
||||
static int io_import_area(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_mem *mem,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = io_validate_user_buf_range(area_reg->addr, area_reg->len);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (area_reg->flags & IORING_ZCRX_AREA_DMABUF)
|
||||
return io_import_dmabuf(ifq, mem, area_reg);
|
||||
return io_import_umem(ifq, mem, area_reg);
|
||||
}
|
||||
|
||||
static void io_zcrx_unmap_umem(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_area *area, int nr_mapped)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_mapped; i++) {
|
||||
netmem_ref netmem = net_iov_to_netmem(&area->nia.niovs[i]);
|
||||
dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem);
|
||||
|
||||
dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE, IO_DMA_ATTR);
|
||||
}
|
||||
}
|
||||
|
||||
static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_area *area, int nr_mapped)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_mapped; i++) {
|
||||
struct net_iov *niov = &area->nia.niovs[i];
|
||||
dma_addr_t dma;
|
||||
if (area->mem.is_dmabuf)
|
||||
io_release_dmabuf(&area->mem);
|
||||
else
|
||||
io_zcrx_unmap_umem(ifq, area, nr_mapped);
|
||||
|
||||
dma = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
|
||||
dma_unmap_page_attrs(ifq->dev, dma, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE, IO_DMA_ATTR);
|
||||
net_mp_niov_set_dma_addr(niov, 0);
|
||||
}
|
||||
for (i = 0; i < area->nia.num_niovs; i++)
|
||||
net_mp_niov_set_dma_addr(&area->nia.niovs[i], 0);
|
||||
}
|
||||
|
||||
static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
|
@ -58,20 +236,16 @@ static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *are
|
|||
area->is_mapped = false;
|
||||
}
|
||||
|
||||
static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
static int io_zcrx_map_area_umem(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
{
|
||||
int i;
|
||||
|
||||
guard(mutex)(&ifq->dma_lock);
|
||||
if (area->is_mapped)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < area->nia.num_niovs; i++) {
|
||||
struct net_iov *niov = &area->nia.niovs[i];
|
||||
dma_addr_t dma;
|
||||
|
||||
dma = dma_map_page_attrs(ifq->dev, area->pages[i], 0, PAGE_SIZE,
|
||||
DMA_FROM_DEVICE, IO_DMA_ATTR);
|
||||
dma = dma_map_page_attrs(ifq->dev, area->mem.pages[i], 0,
|
||||
PAGE_SIZE, DMA_FROM_DEVICE, IO_DMA_ATTR);
|
||||
if (dma_mapping_error(ifq->dev, dma))
|
||||
break;
|
||||
if (net_mp_niov_set_dma_addr(niov, dma)) {
|
||||
|
@ -80,9 +254,24 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
|||
break;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
if (i != area->nia.num_niovs) {
|
||||
__io_zcrx_unmap_area(ifq, area, i);
|
||||
static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area)
|
||||
{
|
||||
unsigned nr;
|
||||
|
||||
guard(mutex)(&ifq->dma_lock);
|
||||
if (area->is_mapped)
|
||||
return 0;
|
||||
|
||||
if (area->mem.is_dmabuf)
|
||||
nr = io_zcrx_map_area_dmabuf(ifq, area);
|
||||
else
|
||||
nr = io_zcrx_map_area_umem(ifq, area);
|
||||
|
||||
if (nr != area->nia.num_niovs) {
|
||||
__io_zcrx_unmap_area(ifq, area, nr);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -118,13 +307,6 @@ struct io_zcrx_args {
|
|||
|
||||
static const struct memory_provider_ops io_uring_pp_zc_ops;
|
||||
|
||||
static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov)
|
||||
{
|
||||
struct net_iov_area *owner = net_iov_owner(niov);
|
||||
|
||||
return container_of(owner, struct io_zcrx_area, nia);
|
||||
}
|
||||
|
||||
static inline atomic_t *io_get_user_counter(struct net_iov *niov)
|
||||
{
|
||||
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
|
||||
|
@ -147,17 +329,12 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov)
|
|||
atomic_inc(io_get_user_counter(niov));
|
||||
}
|
||||
|
||||
static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
|
||||
{
|
||||
struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
|
||||
|
||||
return area->pages[net_iov_idx(niov)];
|
||||
}
|
||||
|
||||
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
|
||||
struct io_uring_zcrx_ifq_reg *reg,
|
||||
struct io_uring_region_desc *rd)
|
||||
struct io_uring_region_desc *rd,
|
||||
u32 id)
|
||||
{
|
||||
u64 mmap_offset;
|
||||
size_t off, size;
|
||||
void *ptr;
|
||||
int ret;
|
||||
|
@ -167,12 +344,14 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
|
|||
if (size > rd->size)
|
||||
return -EINVAL;
|
||||
|
||||
ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd,
|
||||
IORING_MAP_OFF_ZCRX_REGION);
|
||||
mmap_offset = IORING_MAP_OFF_ZCRX_REGION;
|
||||
mmap_offset += id << IORING_OFF_PBUF_SHIFT;
|
||||
|
||||
ret = io_create_region(ifq->ctx, &ifq->region, rd, mmap_offset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ptr = io_region_get_ptr(&ifq->ctx->zcrx_region);
|
||||
ptr = io_region_get_ptr(&ifq->region);
|
||||
ifq->rq_ring = (struct io_uring *)ptr;
|
||||
ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off);
|
||||
return 0;
|
||||
|
@ -180,7 +359,7 @@ static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
|
|||
|
||||
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
|
||||
{
|
||||
io_free_region(ifq->ctx, &ifq->ctx->zcrx_region);
|
||||
io_free_region(ifq->ctx, &ifq->region);
|
||||
ifq->rq_ring = NULL;
|
||||
ifq->rqes = NULL;
|
||||
}
|
||||
|
@ -188,53 +367,44 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
|
|||
static void io_zcrx_free_area(struct io_zcrx_area *area)
|
||||
{
|
||||
io_zcrx_unmap_area(area->ifq, area);
|
||||
io_release_area_mem(&area->mem);
|
||||
|
||||
kvfree(area->freelist);
|
||||
kvfree(area->nia.niovs);
|
||||
kvfree(area->user_refs);
|
||||
if (area->pages) {
|
||||
unpin_user_pages(area->pages, area->nr_folios);
|
||||
kvfree(area->pages);
|
||||
}
|
||||
kfree(area);
|
||||
}
|
||||
|
||||
#define IO_ZCRX_AREA_SUPPORTED_FLAGS (IORING_ZCRX_AREA_DMABUF)
|
||||
|
||||
static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
|
||||
struct io_zcrx_area **res,
|
||||
struct io_uring_zcrx_area_reg *area_reg)
|
||||
{
|
||||
struct io_zcrx_area *area;
|
||||
int i, ret, nr_pages, nr_iovs;
|
||||
struct iovec iov;
|
||||
unsigned nr_iovs;
|
||||
int i, ret;
|
||||
|
||||
if (area_reg->flags || area_reg->rq_area_token)
|
||||
if (area_reg->flags & ~IO_ZCRX_AREA_SUPPORTED_FLAGS)
|
||||
return -EINVAL;
|
||||
if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1])
|
||||
if (area_reg->rq_area_token)
|
||||
return -EINVAL;
|
||||
if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK)
|
||||
if (area_reg->__resv2[0] || area_reg->__resv2[1])
|
||||
return -EINVAL;
|
||||
|
||||
iov.iov_base = u64_to_user_ptr(area_reg->addr);
|
||||
iov.iov_len = area_reg->len;
|
||||
ret = io_buffer_validate(&iov);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = -ENOMEM;
|
||||
area = kzalloc(sizeof(*area), GFP_KERNEL);
|
||||
if (!area)
|
||||
goto err;
|
||||
|
||||
area->pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len,
|
||||
&nr_pages);
|
||||
if (IS_ERR(area->pages)) {
|
||||
ret = PTR_ERR(area->pages);
|
||||
area->pages = NULL;
|
||||
ret = io_import_area(ifq, &area->mem, area_reg);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
area->nr_folios = nr_iovs = nr_pages;
|
||||
|
||||
nr_iovs = area->mem.size >> PAGE_SHIFT;
|
||||
area->nia.num_niovs = nr_iovs;
|
||||
|
||||
ret = -ENOMEM;
|
||||
area->nia.niovs = kvmalloc_array(nr_iovs, sizeof(area->nia.niovs[0]),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!area->nia.niovs)
|
||||
|
@ -245,9 +415,6 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq,
|
|||
if (!area->freelist)
|
||||
goto err;
|
||||
|
||||
for (i = 0; i < nr_iovs; i++)
|
||||
area->freelist[i] = i;
|
||||
|
||||
area->user_refs = kvmalloc_array(nr_iovs, sizeof(area->user_refs[0]),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!area->user_refs)
|
||||
|
@ -341,6 +508,16 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
|
|||
kfree(ifq);
|
||||
}
|
||||
|
||||
struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int id)
|
||||
{
|
||||
struct io_zcrx_ifq *ifq = xa_load(&ctx->zcrx_ctxs, id);
|
||||
|
||||
lockdep_assert_held(&ctx->mmap_lock);
|
||||
|
||||
return ifq ? &ifq->region : NULL;
|
||||
}
|
||||
|
||||
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
||||
struct io_uring_zcrx_ifq_reg __user *arg)
|
||||
{
|
||||
|
@ -350,6 +527,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
struct io_uring_region_desc rd;
|
||||
struct io_zcrx_ifq *ifq;
|
||||
int ret;
|
||||
u32 id;
|
||||
|
||||
/*
|
||||
* 1. Interface queue allocation.
|
||||
|
@ -362,8 +540,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
|
||||
ctx->flags & IORING_SETUP_CQE32))
|
||||
return -EINVAL;
|
||||
if (ctx->ifq)
|
||||
return -EBUSY;
|
||||
if (copy_from_user(®, arg, sizeof(reg)))
|
||||
return -EFAULT;
|
||||
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
|
||||
|
@ -386,29 +562,37 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
ifq = io_zcrx_ifq_alloc(ctx);
|
||||
if (!ifq)
|
||||
return -ENOMEM;
|
||||
ifq->rq_entries = reg.rq_entries;
|
||||
|
||||
ret = io_allocate_rbuf_ring(ifq, ®, &rd);
|
||||
scoped_guard(mutex, &ctx->mmap_lock) {
|
||||
/* preallocate id */
|
||||
ret = xa_alloc(&ctx->zcrx_ctxs, &id, NULL, xa_limit_31b, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto ifq_free;
|
||||
}
|
||||
|
||||
ret = io_allocate_rbuf_ring(ifq, ®, &rd, id);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ifq->netdev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx,
|
||||
&ifq->netdev_tracker, GFP_KERNEL);
|
||||
if (!ifq->netdev) {
|
||||
ret = -ENODEV;
|
||||
goto err;
|
||||
}
|
||||
|
||||
ifq->dev = ifq->netdev->dev.parent;
|
||||
if (!ifq->dev) {
|
||||
ret = -EOPNOTSUPP;
|
||||
goto err;
|
||||
}
|
||||
get_device(ifq->dev);
|
||||
|
||||
ret = io_zcrx_create_area(ifq, &ifq->area, &area);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
ifq->rq_entries = reg.rq_entries;
|
||||
|
||||
ret = -ENODEV;
|
||||
ifq->netdev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx,
|
||||
&ifq->netdev_tracker, GFP_KERNEL);
|
||||
if (!ifq->netdev)
|
||||
goto err;
|
||||
|
||||
ifq->dev = ifq->netdev->dev.parent;
|
||||
ret = -EOPNOTSUPP;
|
||||
if (!ifq->dev)
|
||||
goto err;
|
||||
get_device(ifq->dev);
|
||||
|
||||
mp_param.mp_ops = &io_uring_pp_zc_ops;
|
||||
mp_param.mp_priv = ifq;
|
||||
ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param);
|
||||
|
@ -419,6 +603,14 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
reg.offsets.rqes = sizeof(struct io_uring);
|
||||
reg.offsets.head = offsetof(struct io_uring, head);
|
||||
reg.offsets.tail = offsetof(struct io_uring, tail);
|
||||
reg.zcrx_id = id;
|
||||
|
||||
scoped_guard(mutex, &ctx->mmap_lock) {
|
||||
/* publish ifq */
|
||||
ret = -ENOMEM;
|
||||
if (xa_store(&ctx->zcrx_ctxs, id, ifq, GFP_KERNEL))
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (copy_to_user(arg, ®, sizeof(reg)) ||
|
||||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
|
||||
|
@ -426,24 +618,34 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|||
ret = -EFAULT;
|
||||
goto err;
|
||||
}
|
||||
ctx->ifq = ifq;
|
||||
return 0;
|
||||
err:
|
||||
scoped_guard(mutex, &ctx->mmap_lock)
|
||||
xa_erase(&ctx->zcrx_ctxs, id);
|
||||
ifq_free:
|
||||
io_zcrx_ifq_free(ifq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_zcrx_ifq *ifq = ctx->ifq;
|
||||
struct io_zcrx_ifq *ifq;
|
||||
unsigned long id;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
if (!ifq)
|
||||
return;
|
||||
while (1) {
|
||||
scoped_guard(mutex, &ctx->mmap_lock) {
|
||||
ifq = xa_find(&ctx->zcrx_ctxs, &id, ULONG_MAX, XA_PRESENT);
|
||||
if (ifq)
|
||||
xa_erase(&ctx->zcrx_ctxs, id);
|
||||
}
|
||||
if (!ifq)
|
||||
break;
|
||||
io_zcrx_ifq_free(ifq);
|
||||
}
|
||||
|
||||
ctx->ifq = NULL;
|
||||
io_zcrx_ifq_free(ifq);
|
||||
xa_destroy(&ctx->zcrx_ctxs);
|
||||
}
|
||||
|
||||
static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area)
|
||||
|
@ -500,12 +702,15 @@ static void io_zcrx_scrub(struct io_zcrx_ifq *ifq)
|
|||
|
||||
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_zcrx_ifq *ifq;
|
||||
unsigned long index;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
if (!ctx->ifq)
|
||||
return;
|
||||
io_zcrx_scrub(ctx->ifq);
|
||||
io_close_queue(ctx->ifq);
|
||||
xa_for_each(&ctx->zcrx_ctxs, index, ifq) {
|
||||
io_zcrx_scrub(ifq);
|
||||
io_close_queue(ifq);
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
|
||||
|
@ -742,6 +947,9 @@ static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
|||
size_t copied = 0;
|
||||
int ret = 0;
|
||||
|
||||
if (area->mem.is_dmabuf)
|
||||
return -EFAULT;
|
||||
|
||||
while (len) {
|
||||
size_t copy_size = min_t(size_t, PAGE_SIZE, len);
|
||||
const int dst_off = 0;
|
||||
|
|
|
@ -3,10 +3,24 @@
|
|||
#define IOU_ZC_RX_H
|
||||
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/dma-buf.h>
|
||||
#include <linux/socket.h>
|
||||
#include <net/page_pool/types.h>
|
||||
#include <net/net_trackers.h>
|
||||
|
||||
struct io_zcrx_mem {
|
||||
unsigned long size;
|
||||
bool is_dmabuf;
|
||||
|
||||
struct page **pages;
|
||||
unsigned long nr_folios;
|
||||
|
||||
struct dma_buf_attachment *attach;
|
||||
struct dma_buf *dmabuf;
|
||||
struct sg_table *sgt;
|
||||
unsigned long dmabuf_offset;
|
||||
};
|
||||
|
||||
struct io_zcrx_area {
|
||||
struct net_iov_area nia;
|
||||
struct io_zcrx_ifq *ifq;
|
||||
|
@ -14,13 +28,13 @@ struct io_zcrx_area {
|
|||
|
||||
bool is_mapped;
|
||||
u16 area_id;
|
||||
struct page **pages;
|
||||
unsigned long nr_folios;
|
||||
|
||||
/* freelist */
|
||||
spinlock_t freelist_lock ____cacheline_aligned_in_smp;
|
||||
u32 free_count;
|
||||
u32 *freelist;
|
||||
|
||||
struct io_zcrx_mem mem;
|
||||
};
|
||||
|
||||
struct io_zcrx_ifq {
|
||||
|
@ -39,6 +53,7 @@ struct io_zcrx_ifq {
|
|||
netdevice_tracker netdev_tracker;
|
||||
spinlock_t lock;
|
||||
struct mutex dma_lock;
|
||||
struct io_mapped_region region;
|
||||
};
|
||||
|
||||
#if defined(CONFIG_IO_URING_ZCRX)
|
||||
|
@ -49,6 +64,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx);
|
|||
int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
||||
struct socket *sock, unsigned int flags,
|
||||
unsigned issue_flags, unsigned int *len);
|
||||
struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int id);
|
||||
#else
|
||||
static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
||||
struct io_uring_zcrx_ifq_reg __user *arg)
|
||||
|
@ -67,6 +84,11 @@ static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
|
|||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
static inline struct io_mapped_region *io_zcrx_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
int io_recvzc(struct io_kiocb *req, unsigned int issue_flags);
|
||||
|
|
Loading…
Reference in New Issue
Block a user