linux-yocto/io_uring/zcrx.c
David Wei 6f377873cb io_uring/zcrx: add interface queue and refill queue
Add a new object called an interface queue (ifq) that represents a net
rx queue that has been configured for zero copy. Each ifq is registered
using a new registration opcode IORING_REGISTER_ZCRX_IFQ.

The refill queue is allocated by the kernel and mapped by userspace
using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main
SQ/CQ. It is used by userspace to return buffers that it is done with,
which will then be re-used by the netdev again.

The main CQ ring is used to notify userspace of received data by using
the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each
entry contains the offset + len to the data.

For now, each io_uring instance only has a single ifq.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: David Wei <dw@davidwei.uk>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20250215000947.789731-2-dw@davidwei.uk
Signed-off-by: Jens Axboe <axboe@kernel.dk>
2025-02-17 05:41:03 -07:00

150 lines
3.2 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/mm.h>
#include <linux/io_uring.h>
#include <uapi/linux/io_uring.h>
#include "io_uring.h"
#include "kbuf.h"
#include "memmap.h"
#include "zcrx.h"
#define IO_RQ_MAX_ENTRIES 32768
static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq,
struct io_uring_zcrx_ifq_reg *reg,
struct io_uring_region_desc *rd)
{
size_t off, size;
void *ptr;
int ret;
off = sizeof(struct io_uring);
size = off + sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries;
if (size > rd->size)
return -EINVAL;
ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd,
IORING_MAP_OFF_ZCRX_REGION);
if (ret < 0)
return ret;
ptr = io_region_get_ptr(&ifq->ctx->zcrx_region);
ifq->rq_ring = (struct io_uring *)ptr;
ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off);
return 0;
}
static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq)
{
io_free_region(ifq->ctx, &ifq->ctx->zcrx_region);
ifq->rq_ring = NULL;
ifq->rqes = NULL;
}
static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
{
struct io_zcrx_ifq *ifq;
ifq = kzalloc(sizeof(*ifq), GFP_KERNEL);
if (!ifq)
return NULL;
ifq->if_rxq = -1;
ifq->ctx = ctx;
return ifq;
}
static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
{
io_free_rbuf_ring(ifq);
kfree(ifq);
}
int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
struct io_uring_zcrx_ifq_reg __user *arg)
{
struct io_uring_zcrx_ifq_reg reg;
struct io_uring_region_desc rd;
struct io_zcrx_ifq *ifq;
int ret;
/*
* 1. Interface queue allocation.
* 2. It can observe data destined for sockets of other tasks.
*/
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* mandatory io_uring features for zc rx */
if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
ctx->flags & IORING_SETUP_CQE32))
return -EINVAL;
if (ctx->ifq)
return -EBUSY;
if (copy_from_user(&reg, arg, sizeof(reg)))
return -EFAULT;
if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd)))
return -EFAULT;
if (memchr_inv(&reg.__resv, 0, sizeof(reg.__resv)))
return -EINVAL;
if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags)
return -EINVAL;
if (reg.rq_entries > IO_RQ_MAX_ENTRIES) {
if (!(ctx->flags & IORING_SETUP_CLAMP))
return -EINVAL;
reg.rq_entries = IO_RQ_MAX_ENTRIES;
}
reg.rq_entries = roundup_pow_of_two(reg.rq_entries);
if (!reg.area_ptr)
return -EFAULT;
ifq = io_zcrx_ifq_alloc(ctx);
if (!ifq)
return -ENOMEM;
ret = io_allocate_rbuf_ring(ifq, &reg, &rd);
if (ret)
goto err;
ifq->rq_entries = reg.rq_entries;
ifq->if_rxq = reg.if_rxq;
reg.offsets.rqes = sizeof(struct io_uring);
reg.offsets.head = offsetof(struct io_uring, head);
reg.offsets.tail = offsetof(struct io_uring, tail);
if (copy_to_user(arg, &reg, sizeof(reg)) ||
copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) {
ret = -EFAULT;
goto err;
}
ctx->ifq = ifq;
return 0;
err:
io_zcrx_ifq_free(ifq);
return ret;
}
void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx)
{
struct io_zcrx_ifq *ifq = ctx->ifq;
lockdep_assert_held(&ctx->uring_lock);
if (!ifq)
return;
ctx->ifq = NULL;
io_zcrx_ifq_free(ifq);
}
void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
{
lockdep_assert_held(&ctx->uring_lock);
}