mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 15:03:53 +02:00

Many nvme metadata formats can not strip or generate the metadata on the controller side. For these formats, a host provided integrity buffer is mandatory even if it isn't checked. The block integrity read_verify and write_generate attributes prevent allocating the metadata buffer, but we need it when the format requires it, otherwise reads and writes will be rejected by the driver with IO errors. Assume the integrity buffer can be offloaded to the controller if the metadata size is the same as the protection information size. Otherwise provide an unchecked host buffer when the read verify or write generation attributes are disabled. This fixes the following nvme warning: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210 ... RIP: 0010:nvme_setup_rw+0x122/0x210 ... Call Trace: <TASK> nvme_setup_cmd+0x1b4/0x280 nvme_queue_rqs+0xc4/0x1f0 [nvme] blk_mq_dispatch_queue_requests+0x24a/0x430 blk_mq_flush_plug_list+0x50/0x140 __blk_flush_plug+0xc1/0x100 __submit_bio+0x1c1/0x360 ? submit_bio_noacct_nocheck+0x2d6/0x3c0 submit_bio_noacct_nocheck+0x2d6/0x3c0 ? submit_bio_noacct+0x47/0x4c0 submit_bio_wait+0x48/0xa0 __blkdev_direct_IO_simple+0xee/0x210 ? current_time+0x1d/0x100 ? current_time+0x1d/0x100 ? __bio_clone+0xb0/0xb0 blkdev_read_iter+0xbb/0x140 vfs_read+0x239/0x310 ksys_read+0x58/0xc0 do_syscall_64+0x6c/0x180 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250509153802.3482493-1-kbusch@meta.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
224 lines
5.9 KiB
C
224 lines
5.9 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2007, 2008, 2009 Oracle Corporation
|
|
* Written by: Martin K. Petersen <martin.petersen@oracle.com>
|
|
*
|
|
* Automatically generate and verify integrity data on PI capable devices if the
|
|
* bio submitter didn't provide PI itself. This ensures that kernel verifies
|
|
* data integrity even if the file system (or other user of the block device) is
|
|
* not aware of PI.
|
|
*/
|
|
#include <linux/blk-integrity.h>
|
|
#include <linux/t10-pi.h>
|
|
#include <linux/workqueue.h>
|
|
#include "blk.h"
|
|
|
|
struct bio_integrity_data {
|
|
struct bio *bio;
|
|
struct bvec_iter saved_bio_iter;
|
|
struct work_struct work;
|
|
struct bio_integrity_payload bip;
|
|
struct bio_vec bvec;
|
|
};
|
|
|
|
static struct kmem_cache *bid_slab;
|
|
static mempool_t bid_pool;
|
|
static struct workqueue_struct *kintegrityd_wq;
|
|
|
|
static void bio_integrity_finish(struct bio_integrity_data *bid)
|
|
{
|
|
bid->bio->bi_integrity = NULL;
|
|
bid->bio->bi_opf &= ~REQ_INTEGRITY;
|
|
kfree(bvec_virt(bid->bip.bip_vec));
|
|
mempool_free(bid, &bid_pool);
|
|
}
|
|
|
|
static void bio_integrity_verify_fn(struct work_struct *work)
|
|
{
|
|
struct bio_integrity_data *bid =
|
|
container_of(work, struct bio_integrity_data, work);
|
|
struct bio *bio = bid->bio;
|
|
|
|
blk_integrity_verify_iter(bio, &bid->saved_bio_iter);
|
|
bio_integrity_finish(bid);
|
|
bio_endio(bio);
|
|
}
|
|
|
|
#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
|
|
static bool bip_should_check(struct bio_integrity_payload *bip)
|
|
{
|
|
return bip->bip_flags & BIP_CHECK_FLAGS;
|
|
}
|
|
|
|
static bool bi_offload_capable(struct blk_integrity *bi)
|
|
{
|
|
switch (bi->csum_type) {
|
|
case BLK_INTEGRITY_CSUM_CRC64:
|
|
return bi->tuple_size == sizeof(struct crc64_pi_tuple);
|
|
case BLK_INTEGRITY_CSUM_CRC:
|
|
case BLK_INTEGRITY_CSUM_IP:
|
|
return bi->tuple_size == sizeof(struct t10_pi_tuple);
|
|
default:
|
|
pr_warn_once("%s: unknown integrity checksum type:%d\n",
|
|
__func__, bi->csum_type);
|
|
fallthrough;
|
|
case BLK_INTEGRITY_CSUM_NONE:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* __bio_integrity_endio - Integrity I/O completion function
|
|
* @bio: Protected bio
|
|
*
|
|
* Normally I/O completion is done in interrupt context. However, verifying I/O
|
|
* integrity is a time-consuming task which must be run in process context.
|
|
*
|
|
* This function postpones completion accordingly.
|
|
*/
|
|
bool __bio_integrity_endio(struct bio *bio)
|
|
{
|
|
struct bio_integrity_payload *bip = bio_integrity(bio);
|
|
struct bio_integrity_data *bid =
|
|
container_of(bip, struct bio_integrity_data, bip);
|
|
|
|
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
|
|
bip_should_check(bip)) {
|
|
INIT_WORK(&bid->work, bio_integrity_verify_fn);
|
|
queue_work(kintegrityd_wq, &bid->work);
|
|
return false;
|
|
}
|
|
|
|
bio_integrity_finish(bid);
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* bio_integrity_prep - Prepare bio for integrity I/O
|
|
* @bio: bio to prepare
|
|
*
|
|
* Checks if the bio already has an integrity payload attached. If it does, the
|
|
* payload has been generated by another kernel subsystem, and we just pass it
|
|
* through.
|
|
* Otherwise allocates integrity payload and for writes the integrity metadata
|
|
* will be generated. For reads, the completion handler will verify the
|
|
* metadata.
|
|
*/
|
|
bool bio_integrity_prep(struct bio *bio)
|
|
{
|
|
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
|
|
struct bio_integrity_data *bid;
|
|
bool set_flags = true;
|
|
gfp_t gfp = GFP_NOIO;
|
|
unsigned int len;
|
|
void *buf;
|
|
|
|
if (!bi)
|
|
return true;
|
|
|
|
if (!bio_sectors(bio))
|
|
return true;
|
|
|
|
/* Already protected? */
|
|
if (bio_integrity(bio))
|
|
return true;
|
|
|
|
switch (bio_op(bio)) {
|
|
case REQ_OP_READ:
|
|
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
|
|
if (bi_offload_capable(bi))
|
|
return true;
|
|
set_flags = false;
|
|
}
|
|
break;
|
|
case REQ_OP_WRITE:
|
|
/*
|
|
* Zero the memory allocated to not leak uninitialized kernel
|
|
* memory to disk for non-integrity metadata where nothing else
|
|
* initializes the memory.
|
|
*/
|
|
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
|
|
if (bi_offload_capable(bi))
|
|
return true;
|
|
set_flags = false;
|
|
gfp |= __GFP_ZERO;
|
|
} else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
|
|
gfp |= __GFP_ZERO;
|
|
break;
|
|
default:
|
|
return true;
|
|
}
|
|
|
|
if (WARN_ON_ONCE(bio_has_crypt_ctx(bio)))
|
|
return true;
|
|
|
|
/* Allocate kernel buffer for protection data */
|
|
len = bio_integrity_bytes(bi, bio_sectors(bio));
|
|
buf = kmalloc(len, gfp);
|
|
if (!buf)
|
|
goto err_end_io;
|
|
bid = mempool_alloc(&bid_pool, GFP_NOIO);
|
|
if (!bid)
|
|
goto err_free_buf;
|
|
bio_integrity_init(bio, &bid->bip, &bid->bvec, 1);
|
|
|
|
bid->bio = bio;
|
|
|
|
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
|
|
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
|
|
|
|
if (set_flags) {
|
|
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
|
|
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
|
|
if (bi->csum_type)
|
|
bid->bip.bip_flags |= BIP_CHECK_GUARD;
|
|
if (bi->flags & BLK_INTEGRITY_REF_TAG)
|
|
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
|
|
}
|
|
|
|
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
|
|
offset_in_page(buf)) < len)
|
|
goto err_end_io;
|
|
|
|
/* Auto-generate integrity metadata if this is a write */
|
|
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
|
|
blk_integrity_generate(bio);
|
|
else
|
|
bid->saved_bio_iter = bio->bi_iter;
|
|
return true;
|
|
|
|
err_free_buf:
|
|
kfree(buf);
|
|
err_end_io:
|
|
bio->bi_status = BLK_STS_RESOURCE;
|
|
bio_endio(bio);
|
|
return false;
|
|
}
|
|
EXPORT_SYMBOL(bio_integrity_prep);
|
|
|
|
void blk_flush_integrity(void)
|
|
{
|
|
flush_workqueue(kintegrityd_wq);
|
|
}
|
|
|
|
static int __init blk_integrity_auto_init(void)
|
|
{
|
|
bid_slab = kmem_cache_create("bio_integrity_data",
|
|
sizeof(struct bio_integrity_data), 0,
|
|
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
|
|
|
|
if (mempool_init_slab_pool(&bid_pool, BIO_POOL_SIZE, bid_slab))
|
|
panic("bio: can't create integrity pool\n");
|
|
|
|
/*
|
|
* kintegrityd won't block much but may burn a lot of CPU cycles.
|
|
* Make it highpri CPU intensive wq with max concurrency of 1.
|
|
*/
|
|
kintegrityd_wq = alloc_workqueue("kintegrityd", WQ_MEM_RECLAIM |
|
|
WQ_HIGHPRI | WQ_CPU_INTENSIVE, 1);
|
|
if (!kintegrityd_wq)
|
|
panic("Failed to create kintegrityd\n");
|
|
return 0;
|
|
}
|
|
subsys_initcall(blk_integrity_auto_init);
|