mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 15:03:53 +02:00

Add a new object called an interface queue (ifq) that represents a net rx queue that has been configured for zero copy. Each ifq is registered using a new registration opcode IORING_REGISTER_ZCRX_IFQ. The refill queue is allocated by the kernel and mapped by userspace using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main SQ/CQ. It is used by userspace to return buffers that it is done with, which will then be re-used by the netdev again. The main CQ ring is used to notify userspace of received data by using the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each entry contains the offset + len to the data. For now, each io_uring instance only has a single ifq. Reviewed-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: David Wei <dw@davidwei.uk> Acked-by: Jakub Kicinski <kuba@kernel.org> Link: https://lore.kernel.org/r/20250215000947.789731-2-dw@davidwei.uk Signed-off-by: Jens Axboe <axboe@kernel.dk>
150 lines
3.2 KiB
C
150 lines
3.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/io_uring.h>
|
|
|
|
#include <uapi/linux/io_uring.h>
|
|
|
|
#include "io_uring.h"
|
|
#include "kbuf.h"
|
|
#include "memmap.h"
|
|
#include "zcrx.h"
|
|
|
|
#define IO_RQ_MAX_ENTRIES 32768
|
|
|
|
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
|
|
struct io_uring_zcrx_ifq_reg *reg,
|
|
struct io_uring_region_desc *rd)
|
|
{
|
|
size_t off, size;
|
|
void *ptr;
|
|
int ret;
|
|
|
|
off = sizeof(struct io_uring);
|
|
size = off + sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries;
|
|
if (size > rd->size)
|
|
return -EINVAL;
|
|
|
|
ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd,
|
|
IORING_MAP_OFF_ZCRX_REGION);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
ptr = io_region_get_ptr(&ifq->ctx->zcrx_region);
|
|
ifq->rq_ring = (struct io_uring *)ptr;
|
|
ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off);
|
|
return 0;
|
|
}
|
|
|
|
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
|
|
{
|
|
io_free_region(ifq->ctx, &ifq->ctx->zcrx_region);
|
|
ifq->rq_ring = NULL;
|
|
ifq->rqes = NULL;
|
|
}
|
|
|
|
static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_zcrx_ifq *ifq;
|
|
|
|
ifq = kzalloc(sizeof(*ifq), GFP_KERNEL);
|
|
if (!ifq)
|
|
return NULL;
|
|
|
|
ifq->if_rxq = -1;
|
|
ifq->ctx = ctx;
|
|
return ifq;
|
|
}
|
|
|
|
static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
|
|
{
|
|
io_free_rbuf_ring(ifq);
|
|
kfree(ifq);
|
|
}
|
|
|
|
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
|
|
struct io_uring_zcrx_ifq_reg __user *arg)
|
|
{
|
|
struct io_uring_zcrx_ifq_reg reg;
|
|
struct io_uring_region_desc rd;
|
|
struct io_zcrx_ifq *ifq;
|
|
int ret;
|
|
|
|
/*
|
|
* 1. Interface queue allocation.
|
|
* 2. It can observe data destined for sockets of other tasks.
|
|
*/
|
|
if (!capable(CAP_NET_ADMIN))
|
|
return -EPERM;
|
|
|
|
/* mandatory io_uring features for zc rx */
|
|
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
|
|
ctx->flags & IORING_SETUP_CQE32))
|
|
return -EINVAL;
|
|
if (ctx->ifq)
|
|
return -EBUSY;
|
|
if (copy_from_user(®, arg, sizeof(reg)))
|
|
return -EFAULT;
|
|
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
|
|
return -EFAULT;
|
|
if (memchr_inv(®.__resv, 0, sizeof(reg.__resv)))
|
|
return -EINVAL;
|
|
if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags)
|
|
return -EINVAL;
|
|
if (reg.rq_entries > IO_RQ_MAX_ENTRIES) {
|
|
if (!(ctx->flags & IORING_SETUP_CLAMP))
|
|
return -EINVAL;
|
|
reg.rq_entries = IO_RQ_MAX_ENTRIES;
|
|
}
|
|
reg.rq_entries = roundup_pow_of_two(reg.rq_entries);
|
|
|
|
if (!reg.area_ptr)
|
|
return -EFAULT;
|
|
|
|
ifq = io_zcrx_ifq_alloc(ctx);
|
|
if (!ifq)
|
|
return -ENOMEM;
|
|
|
|
ret = io_allocate_rbuf_ring(ifq, ®, &rd);
|
|
if (ret)
|
|
goto err;
|
|
|
|
ifq->rq_entries = reg.rq_entries;
|
|
ifq->if_rxq = reg.if_rxq;
|
|
|
|
reg.offsets.rqes = sizeof(struct io_uring);
|
|
reg.offsets.head = offsetof(struct io_uring, head);
|
|
reg.offsets.tail = offsetof(struct io_uring, tail);
|
|
|
|
if (copy_to_user(arg, ®, sizeof(reg)) ||
|
|
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) {
|
|
ret = -EFAULT;
|
|
goto err;
|
|
}
|
|
|
|
ctx->ifq = ifq;
|
|
return 0;
|
|
err:
|
|
io_zcrx_ifq_free(ifq);
|
|
return ret;
|
|
}
|
|
|
|
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
|
|
{
|
|
struct io_zcrx_ifq *ifq = ctx->ifq;
|
|
|
|
lockdep_assert_held(&ctx->uring_lock);
|
|
|
|
if (!ifq)
|
|
return;
|
|
|
|
ctx->ifq = NULL;
|
|
io_zcrx_ifq_free(ifq);
|
|
}
|
|
|
|
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
|
|
{
|
|
lockdep_assert_held(&ctx->uring_lock);
|
|
}
|