mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-06 13:55:22 +02:00

Now that we've created the skeleton of a log intent item to track and restart file mapping exchange operations, add the upper level logic to commit intent items and turn them into concrete work recorded in the log. This builds on the existing bmap update intent items that have been around for a while now. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
389 lines
10 KiB
C
389 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2020-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_exchrange.h"
|
|
#include "xfs_exchmaps.h"
|
|
#include <linux/fsnotify.h>
|
|
|
|
/* Lock (and optionally join) two inodes for a file range exchange. */
|
|
void
|
|
xfs_exchrange_ilock(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip1,
|
|
struct xfs_inode *ip2)
|
|
{
|
|
if (ip1 != ip2)
|
|
xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL,
|
|
ip2, XFS_ILOCK_EXCL);
|
|
else
|
|
xfs_ilock(ip1, XFS_ILOCK_EXCL);
|
|
if (tp) {
|
|
xfs_trans_ijoin(tp, ip1, 0);
|
|
if (ip2 != ip1)
|
|
xfs_trans_ijoin(tp, ip2, 0);
|
|
}
|
|
|
|
}
|
|
|
|
/* Unlock two inodes after a file range exchange operation. */
|
|
void
|
|
xfs_exchrange_iunlock(
|
|
struct xfs_inode *ip1,
|
|
struct xfs_inode *ip2)
|
|
{
|
|
if (ip2 != ip1)
|
|
xfs_iunlock(ip2, XFS_ILOCK_EXCL);
|
|
xfs_iunlock(ip1, XFS_ILOCK_EXCL);
|
|
}
|
|
|
|
/*
|
|
* Estimate the resource requirements to exchange file contents between the two
|
|
* files. The caller is required to hold the IOLOCK and the MMAPLOCK and to
|
|
* have flushed both inodes' pagecache and active direct-ios.
|
|
*/
|
|
int
|
|
xfs_exchrange_estimate(
|
|
struct xfs_exchmaps_req *req)
|
|
{
|
|
int error;
|
|
|
|
xfs_exchrange_ilock(NULL, req->ip1, req->ip2);
|
|
error = xfs_exchmaps_estimate(req);
|
|
xfs_exchrange_iunlock(req->ip1, req->ip2);
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE.
|
|
* This part deals with struct file objects and byte ranges and does not deal
|
|
* with XFS-specific data structures such as xfs_inodes and block ranges. This
|
|
* separation may some day facilitate porting to another filesystem.
|
|
*
|
|
* The goal is to exchange fxr.length bytes starting at fxr.file1_offset in
|
|
* file1 with the same number of bytes starting at fxr.file2_offset in file2.
|
|
* Implementations must call xfs_exchange_range_prep to prepare the two
|
|
* files prior to taking locks; and they must update the inode change and mod
|
|
* times of both files as part of the metadata update. The timestamp update
|
|
* and freshness checks must be done atomically as part of the data exchange
|
|
* operation to ensure correctness of the freshness check.
|
|
* xfs_exchange_range_finish must be called after the operation completes
|
|
* successfully but before locks are dropped.
|
|
*/
|
|
|
|
/* Verify that we have security clearance to perform this operation. */
|
|
static int
|
|
xfs_exchange_range_verify_area(
|
|
struct xfs_exchrange *fxr)
|
|
{
|
|
int ret;
|
|
|
|
ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
|
|
true);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
|
|
true);
|
|
}
|
|
|
|
/*
|
|
* Performs necessary checks before doing a range exchange, having stabilized
|
|
* mutable inode attributes via i_rwsem.
|
|
*/
|
|
static inline int
|
|
xfs_exchange_range_checks(
|
|
struct xfs_exchrange *fxr,
|
|
unsigned int alloc_unit)
|
|
{
|
|
struct inode *inode1 = file_inode(fxr->file1);
|
|
struct inode *inode2 = file_inode(fxr->file2);
|
|
uint64_t allocmask = alloc_unit - 1;
|
|
int64_t test_len;
|
|
uint64_t blen;
|
|
loff_t size1, size2, tmp;
|
|
int error;
|
|
|
|
/* Don't touch certain kinds of inodes */
|
|
if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
|
|
return -EPERM;
|
|
if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
|
|
return -ETXTBSY;
|
|
|
|
size1 = i_size_read(inode1);
|
|
size2 = i_size_read(inode2);
|
|
|
|
/* Ranges cannot start after EOF. */
|
|
if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If the caller said to exchange to EOF, we set the length of the
|
|
* request large enough to cover everything to the end of both files.
|
|
*/
|
|
if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
|
|
fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
|
|
size2 - fxr->file2_offset);
|
|
|
|
error = xfs_exchange_range_verify_area(fxr);
|
|
if (error)
|
|
return error;
|
|
}
|
|
|
|
/*
|
|
* The start of both ranges must be aligned to the file allocation
|
|
* unit.
|
|
*/
|
|
if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) ||
|
|
!IS_ALIGNED(fxr->file2_offset, alloc_unit))
|
|
return -EINVAL;
|
|
|
|
/* Ensure offsets don't wrap. */
|
|
if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) ||
|
|
check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* We require both ranges to end within EOF, unless we're exchanging
|
|
* to EOF.
|
|
*/
|
|
if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
|
|
(fxr->file1_offset + fxr->length > size1 ||
|
|
fxr->file2_offset + fxr->length > size2))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Make sure we don't hit any file size limits. If we hit any size
|
|
* limits such that test_length was adjusted, we abort the whole
|
|
* operation.
|
|
*/
|
|
test_len = fxr->length;
|
|
error = generic_write_check_limits(fxr->file2, fxr->file2_offset,
|
|
&test_len);
|
|
if (error)
|
|
return error;
|
|
error = generic_write_check_limits(fxr->file1, fxr->file1_offset,
|
|
&test_len);
|
|
if (error)
|
|
return error;
|
|
if (test_len != fxr->length)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If the user wanted us to exchange up to the infile's EOF, round up
|
|
* to the next allocation unit boundary for this check. Do the same
|
|
* for the outfile.
|
|
*
|
|
* Otherwise, reject the range length if it's not aligned to an
|
|
* allocation unit.
|
|
*/
|
|
if (fxr->file1_offset + fxr->length == size1)
|
|
blen = ALIGN(size1, alloc_unit) - fxr->file1_offset;
|
|
else if (fxr->file2_offset + fxr->length == size2)
|
|
blen = ALIGN(size2, alloc_unit) - fxr->file2_offset;
|
|
else if (!IS_ALIGNED(fxr->length, alloc_unit))
|
|
return -EINVAL;
|
|
else
|
|
blen = fxr->length;
|
|
|
|
/* Don't allow overlapped exchanges within the same file. */
|
|
if (inode1 == inode2 &&
|
|
fxr->file2_offset + blen > fxr->file1_offset &&
|
|
fxr->file1_offset + blen > fxr->file2_offset)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Ensure that we don't exchange a partial EOF block into the middle of
|
|
* another file.
|
|
*/
|
|
if ((fxr->length & allocmask) == 0)
|
|
return 0;
|
|
|
|
blen = fxr->length;
|
|
if (fxr->file2_offset + blen < size2)
|
|
blen &= ~allocmask;
|
|
|
|
if (fxr->file1_offset + blen < size1)
|
|
blen &= ~allocmask;
|
|
|
|
return blen == fxr->length ? 0 : -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* Check that the two inodes are eligible for range exchanges, the ranges make
|
|
* sense, and then flush all dirty data. Caller must ensure that the inodes
|
|
* have been locked against any other modifications.
|
|
*/
|
|
static inline int
|
|
xfs_exchange_range_prep(
|
|
struct xfs_exchrange *fxr,
|
|
unsigned int alloc_unit)
|
|
{
|
|
struct inode *inode1 = file_inode(fxr->file1);
|
|
struct inode *inode2 = file_inode(fxr->file2);
|
|
bool same_inode = (inode1 == inode2);
|
|
int error;
|
|
|
|
/* Check that we don't violate system file offset limits. */
|
|
error = xfs_exchange_range_checks(fxr, alloc_unit);
|
|
if (error || fxr->length == 0)
|
|
return error;
|
|
|
|
/* Wait for the completion of any pending IOs on both files */
|
|
inode_dio_wait(inode1);
|
|
if (!same_inode)
|
|
inode_dio_wait(inode2);
|
|
|
|
error = filemap_write_and_wait_range(inode1->i_mapping,
|
|
fxr->file1_offset,
|
|
fxr->file1_offset + fxr->length - 1);
|
|
if (error)
|
|
return error;
|
|
|
|
error = filemap_write_and_wait_range(inode2->i_mapping,
|
|
fxr->file2_offset,
|
|
fxr->file2_offset + fxr->length - 1);
|
|
if (error)
|
|
return error;
|
|
|
|
/*
|
|
* If the files or inodes involved require synchronous writes, amend
|
|
* the request to force the filesystem to flush all data and metadata
|
|
* to disk after the operation completes.
|
|
*/
|
|
if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) ||
|
|
IS_SYNC(inode1) || IS_SYNC(inode2))
|
|
fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Finish a range exchange operation, if it was successful. Caller must ensure
|
|
* that the inodes are still locked against any other modifications.
|
|
*/
|
|
static inline int
|
|
xfs_exchange_range_finish(
|
|
struct xfs_exchrange *fxr)
|
|
{
|
|
int error;
|
|
|
|
error = file_remove_privs(fxr->file1);
|
|
if (error)
|
|
return error;
|
|
if (file_inode(fxr->file1) == file_inode(fxr->file2))
|
|
return 0;
|
|
|
|
return file_remove_privs(fxr->file2);
|
|
}
|
|
|
|
/* Exchange parts of two files. */
|
|
static int
|
|
xfs_exchange_range(
|
|
struct xfs_exchrange *fxr)
|
|
{
|
|
struct inode *inode1 = file_inode(fxr->file1);
|
|
struct inode *inode2 = file_inode(fxr->file2);
|
|
int ret;
|
|
|
|
BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
|
|
XFS_EXCHANGE_RANGE_PRIV_FLAGS);
|
|
|
|
/* Both files must be on the same mount/filesystem. */
|
|
if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
|
|
return -EXDEV;
|
|
|
|
if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
|
|
return -EINVAL;
|
|
|
|
/* Userspace requests only honored for regular files. */
|
|
if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
|
|
return -EISDIR;
|
|
if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
|
|
return -EINVAL;
|
|
|
|
/* Both files must be opened for read and write. */
|
|
if (!(fxr->file1->f_mode & FMODE_READ) ||
|
|
!(fxr->file1->f_mode & FMODE_WRITE) ||
|
|
!(fxr->file2->f_mode & FMODE_READ) ||
|
|
!(fxr->file2->f_mode & FMODE_WRITE))
|
|
return -EBADF;
|
|
|
|
/* Neither file can be opened append-only. */
|
|
if ((fxr->file1->f_flags & O_APPEND) ||
|
|
(fxr->file2->f_flags & O_APPEND))
|
|
return -EBADF;
|
|
|
|
/*
|
|
* If we're not exchanging to EOF, we can check the areas before
|
|
* stabilizing both files' i_size.
|
|
*/
|
|
if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
|
|
ret = xfs_exchange_range_verify_area(fxr);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
/* Update cmtime if the fd/inode don't forbid it. */
|
|
if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
|
|
fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
|
|
if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
|
|
fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;
|
|
|
|
file_start_write(fxr->file2);
|
|
ret = -EOPNOTSUPP; /* XXX call out to lower level code */
|
|
file_end_write(fxr->file2);
|
|
if (ret)
|
|
return ret;
|
|
|
|
fsnotify_modify(fxr->file1);
|
|
if (fxr->file2 != fxr->file1)
|
|
fsnotify_modify(fxr->file2);
|
|
return 0;
|
|
}
|
|
|
|
/* Collect exchange-range arguments from userspace. */
|
|
long
|
|
xfs_ioc_exchange_range(
|
|
struct file *file,
|
|
struct xfs_exchange_range __user *argp)
|
|
{
|
|
struct xfs_exchrange fxr = {
|
|
.file2 = file,
|
|
};
|
|
struct xfs_exchange_range args;
|
|
struct fd file1;
|
|
int error;
|
|
|
|
if (copy_from_user(&args, argp, sizeof(args)))
|
|
return -EFAULT;
|
|
if (memchr_inv(&args.pad, 0, sizeof(args.pad)))
|
|
return -EINVAL;
|
|
if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
|
|
return -EINVAL;
|
|
|
|
fxr.file1_offset = args.file1_offset;
|
|
fxr.file2_offset = args.file2_offset;
|
|
fxr.length = args.length;
|
|
fxr.flags = args.flags;
|
|
|
|
file1 = fdget(args.file1_fd);
|
|
if (!file1.file)
|
|
return -EBADF;
|
|
fxr.file1 = file1.file;
|
|
|
|
error = xfs_exchange_range(&fxr);
|
|
fdput(file1);
|
|
return error;
|
|
}
|