mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-05 13:25:20 +02:00
ext4: remove writable userspace mappings before truncating page cache
[ Upstream commit 17207d0bb209e8b40f27d7f3f96e82a78af0bf2c ] When zeroing a range of folios on the filesystem which block size is less than the page size, the file's mapped blocks within one page will be marked as unwritten, we should remove writable userspace mappings to ensure that ext4_page_mkwrite() can be called during subsequent write access to these partial folios. Otherwise, data written by subsequent mmap writes may not be saved to disk. $mkfs.ext4 -b 1024 /dev/vdb $mount /dev/vdb /mnt $xfs_io -t -f -c "pwrite -S 0x58 0 4096" -c "mmap -rw 0 4096" \ -c "mwrite -S 0x5a 2048 2048" -c "fzero 2048 2048" \ -c "mwrite -S 0x59 2048 2048" -c "close" /mnt/foo $od -Ax -t x1z /mnt/foo 000000 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 * 000800 59 59 59 59 59 59 59 59 59 59 59 59 59 59 59 59 * 001000 $umount /mnt && mount /dev/vdb /mnt $od -Ax -t x1z /mnt/foo 000000 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 58 * 000800 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 001000 Fix this by introducing ext4_truncate_page_cache_block_range() to remove writable userspace mappings when truncating a partial folio range. Additionally, move the journal data mode-specific handlers and truncate_pagecache_range() into this function, allowing it to serve as a common helper that correctly manages the page cache in preparation for block range manipulations. Signed-off-by: Zhang Yi <yi.zhang@huawei.com> Reviewed-by: Jan Kara <jack@suse.cz> Reviewed-by: Ojaswin Mujoo <ojaswin@linux.ibm.com> Link: https://patch.msgid.link/20241220011637.1157197-2-yi.zhang@huaweicloud.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
73e7c65b21
commit
23fe8aa8cc
|
@ -2995,6 +2995,8 @@ extern int ext4_inode_attach_jinode(struct inode *inode);
|
||||||
extern int ext4_can_truncate(struct inode *inode);
|
extern int ext4_can_truncate(struct inode *inode);
|
||||||
extern int ext4_truncate(struct inode *);
|
extern int ext4_truncate(struct inode *);
|
||||||
extern int ext4_break_layouts(struct inode *);
|
extern int ext4_break_layouts(struct inode *);
|
||||||
|
extern int ext4_truncate_page_cache_block_range(struct inode *inode,
|
||||||
|
loff_t start, loff_t end);
|
||||||
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
|
extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
|
||||||
extern void ext4_set_inode_flags(struct inode *, bool init);
|
extern void ext4_set_inode_flags(struct inode *, bool init);
|
||||||
extern int ext4_alloc_da_blocks(struct inode *inode);
|
extern int ext4_alloc_da_blocks(struct inode *inode);
|
||||||
|
|
|
@ -4660,22 +4660,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||||
goto out_mutex;
|
goto out_mutex;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/* Now release the pages and zero block aligned part of pages */
|
||||||
* For journalled data we need to write (and checkpoint) pages
|
ret = ext4_truncate_page_cache_block_range(inode, start, end);
|
||||||
* before discarding page cache to avoid inconsitent data on
|
if (ret) {
|
||||||
* disk in case of crash before zeroing trans is committed.
|
filemap_invalidate_unlock(mapping);
|
||||||
*/
|
goto out_mutex;
|
||||||
if (ext4_should_journal_data(inode)) {
|
|
||||||
ret = filemap_write_and_wait_range(mapping, start,
|
|
||||||
end - 1);
|
|
||||||
if (ret) {
|
|
||||||
filemap_invalidate_unlock(mapping);
|
|
||||||
goto out_mutex;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now release the pages and zero block aligned part of pages */
|
|
||||||
truncate_pagecache_range(inode, start, end - 1);
|
|
||||||
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
|
||||||
|
|
||||||
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
|
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
#include <linux/writeback.h>
|
#include <linux/writeback.h>
|
||||||
#include <linux/pagevec.h>
|
#include <linux/pagevec.h>
|
||||||
#include <linux/mpage.h>
|
#include <linux/mpage.h>
|
||||||
|
#include <linux/rmap.h>
|
||||||
#include <linux/namei.h>
|
#include <linux/namei.h>
|
||||||
#include <linux/uio.h>
|
#include <linux/uio.h>
|
||||||
#include <linux/bio.h>
|
#include <linux/bio.h>
|
||||||
|
@ -3892,6 +3893,68 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void ext4_truncate_folio(struct inode *inode,
|
||||||
|
loff_t start, loff_t end)
|
||||||
|
{
|
||||||
|
unsigned long blocksize = i_blocksize(inode);
|
||||||
|
struct folio *folio;
|
||||||
|
|
||||||
|
/* Nothing to be done if no complete block needs to be truncated. */
|
||||||
|
if (round_up(start, blocksize) >= round_down(end, blocksize))
|
||||||
|
return;
|
||||||
|
|
||||||
|
folio = filemap_lock_folio(inode->i_mapping, start >> PAGE_SHIFT);
|
||||||
|
if (IS_ERR(folio))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (folio_mkclean(folio))
|
||||||
|
folio_mark_dirty(folio);
|
||||||
|
folio_unlock(folio);
|
||||||
|
folio_put(folio);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ext4_truncate_page_cache_block_range(struct inode *inode,
|
||||||
|
loff_t start, loff_t end)
|
||||||
|
{
|
||||||
|
unsigned long blocksize = i_blocksize(inode);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For journalled data we need to write (and checkpoint) pages
|
||||||
|
* before discarding page cache to avoid inconsitent data on disk
|
||||||
|
* in case of crash before freeing or unwritten converting trans
|
||||||
|
* is committed.
|
||||||
|
*/
|
||||||
|
if (ext4_should_journal_data(inode)) {
|
||||||
|
ret = filemap_write_and_wait_range(inode->i_mapping, start,
|
||||||
|
end - 1);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
goto truncate_pagecache;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the block size is less than the page size, the file's mapped
|
||||||
|
* blocks within one page could be freed or converted to unwritten.
|
||||||
|
* So it's necessary to remove writable userspace mappings, and then
|
||||||
|
* ext4_page_mkwrite() can be called during subsequent write access
|
||||||
|
* to these partial folios.
|
||||||
|
*/
|
||||||
|
if (!IS_ALIGNED(start | end, PAGE_SIZE) &&
|
||||||
|
blocksize < PAGE_SIZE && start < inode->i_size) {
|
||||||
|
loff_t page_boundary = round_up(start, PAGE_SIZE);
|
||||||
|
|
||||||
|
ext4_truncate_folio(inode, start, min(page_boundary, end));
|
||||||
|
if (end > page_boundary)
|
||||||
|
ext4_truncate_folio(inode,
|
||||||
|
round_down(end, PAGE_SIZE), end);
|
||||||
|
}
|
||||||
|
|
||||||
|
truncate_pagecache:
|
||||||
|
truncate_pagecache_range(inode, start, end - 1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void ext4_wait_dax_page(struct inode *inode)
|
static void ext4_wait_dax_page(struct inode *inode)
|
||||||
{
|
{
|
||||||
filemap_invalidate_unlock(inode->i_mapping);
|
filemap_invalidate_unlock(inode->i_mapping);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user