mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
io_uring/rsrc: Simplify buffer cloning by locking both rings
The locking in the buffer cloning code is somewhat complex because it goes back and forth between locking the source ring and the destination ring. Make it easier to reason about by locking both rings at the same time. To avoid ABBA deadlocks, lock the rings in ascending kernel address order, just like in lock_two_nondirectories(). Signed-off-by: Jann Horn <jannh@google.com> Link: https://lore.kernel.org/r/20250115-uring-clone-refactor-v2-1-7289ba50776d@google.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
95ec54a420
commit
5719e28235
|
@ -921,6 +921,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Lock two rings at once. The rings must be different! */
|
||||||
|
static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
|
||||||
|
{
|
||||||
|
if (ctx1 > ctx2)
|
||||||
|
swap(ctx1, ctx2);
|
||||||
|
mutex_lock(&ctx1->uring_lock);
|
||||||
|
mutex_lock_nested(&ctx2->uring_lock, SINGLE_DEPTH_NESTING);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Both rings are locked by the caller. */
|
||||||
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
|
static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx,
|
||||||
struct io_uring_clone_buffers *arg)
|
struct io_uring_clone_buffers *arg)
|
||||||
{
|
{
|
||||||
|
@ -928,6 +938,9 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
int i, ret, off, nr;
|
int i, ret, off, nr;
|
||||||
unsigned int nbufs;
|
unsigned int nbufs;
|
||||||
|
|
||||||
|
lockdep_assert_held(&ctx->uring_lock);
|
||||||
|
lockdep_assert_held(&src_ctx->uring_lock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Accounting state is shared between the two rings; that only works if
|
* Accounting state is shared between the two rings; that only works if
|
||||||
* both rings are accounted towards the same counters.
|
* both rings are accounted towards the same counters.
|
||||||
|
@ -942,7 +955,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
|
if (ctx->buf_table.nr && !(arg->flags & IORING_REGISTER_DST_REPLACE))
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
nbufs = READ_ONCE(src_ctx->buf_table.nr);
|
nbufs = src_ctx->buf_table.nr;
|
||||||
if (!arg->nr)
|
if (!arg->nr)
|
||||||
arg->nr = nbufs;
|
arg->nr = nbufs;
|
||||||
else if (arg->nr > nbufs)
|
else if (arg->nr > nbufs)
|
||||||
|
@ -966,27 +979,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Drop our own lock here. We'll setup the data we need and reference
|
|
||||||
* the source buffers, then re-grab, check, and assign at the end.
|
|
||||||
*/
|
|
||||||
mutex_unlock(&ctx->uring_lock);
|
|
||||||
|
|
||||||
mutex_lock(&src_ctx->uring_lock);
|
|
||||||
ret = -ENXIO;
|
ret = -ENXIO;
|
||||||
nbufs = src_ctx->buf_table.nr;
|
nbufs = src_ctx->buf_table.nr;
|
||||||
if (!nbufs)
|
if (!nbufs)
|
||||||
goto out_unlock;
|
goto out_free;
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
if (!arg->nr)
|
if (!arg->nr)
|
||||||
arg->nr = nbufs;
|
arg->nr = nbufs;
|
||||||
else if (arg->nr > nbufs)
|
else if (arg->nr > nbufs)
|
||||||
goto out_unlock;
|
goto out_free;
|
||||||
ret = -EOVERFLOW;
|
ret = -EOVERFLOW;
|
||||||
if (check_add_overflow(arg->nr, arg->src_off, &off))
|
if (check_add_overflow(arg->nr, arg->src_off, &off))
|
||||||
goto out_unlock;
|
goto out_free;
|
||||||
if (off > nbufs)
|
if (off > nbufs)
|
||||||
goto out_unlock;
|
goto out_free;
|
||||||
|
|
||||||
off = arg->dst_off;
|
off = arg->dst_off;
|
||||||
i = arg->src_off;
|
i = arg->src_off;
|
||||||
|
@ -1001,7 +1007,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
|
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
|
||||||
if (!dst_node) {
|
if (!dst_node) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto out_unlock;
|
goto out_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
refcount_inc(&src_node->buf->refs);
|
refcount_inc(&src_node->buf->refs);
|
||||||
|
@ -1011,10 +1017,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Have a ref on the bufs now, drop src lock and re-grab our own lock */
|
|
||||||
mutex_unlock(&src_ctx->uring_lock);
|
|
||||||
mutex_lock(&ctx->uring_lock);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If asked for replace, put the old table. data->nodes[] holds both
|
* If asked for replace, put the old table. data->nodes[] holds both
|
||||||
* old and new nodes at this point.
|
* old and new nodes at this point.
|
||||||
|
@ -1023,24 +1025,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||||
io_rsrc_data_free(ctx, &ctx->buf_table);
|
io_rsrc_data_free(ctx, &ctx->buf_table);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ctx->buf_table should be empty now - either the contents are being
|
* ctx->buf_table must be empty now - either the contents are being
|
||||||
* replaced and we just freed the table, or someone raced setting up
|
* replaced and we just freed the table, or the contents are being
|
||||||
* a buffer table while the clone was happening. If not empty, fall
|
* copied to a ring that does not have buffers yet (checked at function
|
||||||
* through to failure handling.
|
* entry).
|
||||||
*/
|
*/
|
||||||
if (!ctx->buf_table.nr) {
|
WARN_ON_ONCE(ctx->buf_table.nr);
|
||||||
ctx->buf_table = data;
|
ctx->buf_table = data;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
mutex_unlock(&ctx->uring_lock);
|
out_free:
|
||||||
mutex_lock(&src_ctx->uring_lock);
|
|
||||||
/* someone raced setting up buffers, dump ours */
|
|
||||||
ret = -EBUSY;
|
|
||||||
out_unlock:
|
|
||||||
io_rsrc_data_free(ctx, &data);
|
io_rsrc_data_free(ctx, &data);
|
||||||
mutex_unlock(&src_ctx->uring_lock);
|
|
||||||
mutex_lock(&ctx->uring_lock);
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1054,6 +1049,7 @@ out_unlock:
|
||||||
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
|
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
|
||||||
{
|
{
|
||||||
struct io_uring_clone_buffers buf;
|
struct io_uring_clone_buffers buf;
|
||||||
|
struct io_ring_ctx *src_ctx;
|
||||||
bool registered_src;
|
bool registered_src;
|
||||||
struct file *file;
|
struct file *file;
|
||||||
int ret;
|
int ret;
|
||||||
|
@ -1071,7 +1067,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
|
||||||
file = io_uring_register_get_file(buf.src_fd, registered_src);
|
file = io_uring_register_get_file(buf.src_fd, registered_src);
|
||||||
if (IS_ERR(file))
|
if (IS_ERR(file))
|
||||||
return PTR_ERR(file);
|
return PTR_ERR(file);
|
||||||
ret = io_clone_buffers(ctx, file->private_data, &buf);
|
|
||||||
|
src_ctx = file->private_data;
|
||||||
|
if (src_ctx != ctx) {
|
||||||
|
mutex_unlock(&ctx->uring_lock);
|
||||||
|
lock_two_rings(ctx, src_ctx);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = io_clone_buffers(ctx, src_ctx, &buf);
|
||||||
|
|
||||||
|
if (src_ctx != ctx)
|
||||||
|
mutex_unlock(&src_ctx->uring_lock);
|
||||||
|
|
||||||
if (!registered_src)
|
if (!registered_src)
|
||||||
fput(file);
|
fput(file);
|
||||||
return ret;
|
return ret;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user