From 095f627add86a6ddda2c2cfd563b0ee05d0172b2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:52 -0600 Subject: [PATCH 01/10] mm/filemap: gate dropbehind invalidate on folio !dirty && !writeback It's possible for the folio to either get marked for writeback or redirtied. Add a helper, filemap_end_dropbehind(), which guards the folio_unmap_invalidate() call behind check for the folio being both non-dirty and not under writeback AFTER the folio lock has been acquired. Use this helper folio_end_dropbehind_write(). Cc: stable@vger.kernel.org Reported-by: Al Viro Fixes: fb7d3bc41493 ("mm/filemap: drop streaming/uncached pages when writeback completes") Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-2-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 7b90cbeb4a1a..008a55290f34 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1589,6 +1589,16 @@ int folio_wait_private_2_killable(struct folio *folio) } EXPORT_SYMBOL(folio_wait_private_2_killable); +static void filemap_end_dropbehind(struct folio *folio) +{ + struct address_space *mapping = folio->mapping; + + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); + + if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + folio_unmap_invalidate(mapping, folio, 0); +} + /* * If folio was marked as dropbehind, then pages should be dropped when writeback * completes. Do that now. If we fail, it's likely because of a big folio - @@ -1604,8 +1614,7 @@ static void folio_end_dropbehind_write(struct folio *folio) * invalidation in that case. */ if (in_task() && folio_trylock(folio)) { - if (folio->mapping) - folio_unmap_invalidate(folio->mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } From 25b065a744ff0c1099bb357be1c40030b5a14c07 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:53 -0600 Subject: [PATCH 02/10] mm/filemap: use filemap_end_dropbehind() for read invalidation Use the filemap_end_dropbehind() helper rather than calling folio_unmap_invalidate() directly, as we need to check if the folio has been redirtied or marked for writeback once the folio lock has been re-acquired. Cc: stable@vger.kernel.org Reported-by: Trond Myklebust Fixes: 8026e49bff9b ("mm/filemap: add read support for RWF_DONTCACHE") Link: https://lore.kernel.org/linux-fsdevel/ba8a9805331ce258a622feaca266b163db681a10.camel@hammerspace.com/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-3-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 008a55290f34..6af6d8f2929c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2644,8 +2644,7 @@ static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio) return (pos1 >> shift == pos2 >> shift); } -static void filemap_end_dropbehind_read(struct address_space *mapping, - struct folio *folio) +static void filemap_end_dropbehind_read(struct folio *folio) { if (!folio_test_dropbehind(folio)) return; @@ -2653,7 +2652,7 @@ static void filemap_end_dropbehind_read(struct address_space *mapping, return; if (folio_trylock(folio)) { if (folio_test_clear_dropbehind(folio)) - folio_unmap_invalidate(mapping, folio, 0); + filemap_end_dropbehind(folio); folio_unlock(folio); } } @@ -2774,7 +2773,7 @@ put_folios: for (i = 0; i < folio_batch_count(&fbatch); i++) { struct folio *folio = fbatch.folios[i]; - filemap_end_dropbehind_read(mapping, folio); + filemap_end_dropbehind_read(folio); folio_put(folio); } folio_batch_init(&fbatch); From 7b2b67dbd449afd00fc7279b1ab7ffa3d26fe0c9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:54 -0600 Subject: [PATCH 03/10] Revert "Disable FOP_DONTCACHE for now due to bugs" This reverts commit 478ad02d6844217cc7568619aeb0809d93ade43d. Both the read and write side dirty && writeback races should be resolved now, revert the commit that disabled FOP_DONTCACHE for filesystems. Link: https://lore.kernel.org/linux-fsdevel/20250525083209.GS2023217@ZenIV/ Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-4-axboe@kernel.dk Signed-off-by: Christian Brauner --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 0db87f8e676c..57c3db3ef6ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2207,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ +#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, From 1da7a06d9ce4edea3370945af8bfcc71b7744788 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:55 -0600 Subject: [PATCH 04/10] mm/filemap: unify read/write dropbehind naming The read side is filemap_end_dropbehind_read(), while the write side used folio_ as the prefix rather than filemap_. The read side makes more sense, unify the naming such that the write side follows that. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-5-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 6af6d8f2929c..2ba1ed116103 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1604,7 +1604,7 @@ static void filemap_end_dropbehind(struct folio *folio) * completes. Do that now. If we fail, it's likely because of a big folio - * just reset dropbehind for that case and latter completions should invalidate. */ -static void folio_end_dropbehind_write(struct folio *folio) +static void filemap_end_dropbehind_write(struct folio *folio) { /* * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, @@ -1659,7 +1659,7 @@ void folio_end_writeback(struct folio *folio) acct_reclaim_writeback(folio); if (folio_dropbehind) - folio_end_dropbehind_write(folio); + filemap_end_dropbehind_write(folio); folio_put(folio); } EXPORT_SYMBOL(folio_end_writeback); From a1d98e4ffb972ab007f5de850ef53c2a46cacf15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 07:28:56 -0600 Subject: [PATCH 05/10] mm/filemap: unify dropbehind flag testing and clearing The read and write side does this a bit differently, unify it such that the _{read,write} helpers check the bit before locking, and the generic handler is in charge of clearing the bit and invalidating, once under the folio lock. Signed-off-by: Jens Axboe Link: https://lore.kernel.org/20250527133255.452431-6-axboe@kernel.dk Signed-off-by: Christian Brauner --- mm/filemap.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 2ba1ed116103..eef44d7ea12e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1595,7 +1595,11 @@ static void filemap_end_dropbehind(struct folio *folio) VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); - if (mapping && !folio_test_writeback(folio) && !folio_test_dirty(folio)) + if (folio_test_writeback(folio) || folio_test_dirty(folio)) + return; + if (!folio_test_clear_dropbehind(folio)) + return; + if (mapping) folio_unmap_invalidate(mapping, folio, 0); } @@ -1606,6 +1610,9 @@ static void filemap_end_dropbehind(struct folio *folio) */ static void filemap_end_dropbehind_write(struct folio *folio) { + if (!folio_test_dropbehind(folio)) + return; + /* * Hitting !in_task() should not happen off RWF_DONTCACHE writeback, * but can happen if normal writeback just happens to find dirty folios @@ -1629,8 +1636,6 @@ static void filemap_end_dropbehind_write(struct folio *folio) */ void folio_end_writeback(struct folio *folio) { - bool folio_dropbehind = false; - VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio); /* @@ -1652,14 +1657,11 @@ void folio_end_writeback(struct folio *folio) * reused before the folio_wake_bit(). */ folio_get(folio); - if (!folio_test_dirty(folio)) - folio_dropbehind = folio_test_clear_dropbehind(folio); if (__folio_end_writeback(folio)) folio_wake_bit(folio, PG_writeback); - acct_reclaim_writeback(folio); - if (folio_dropbehind) - filemap_end_dropbehind_write(folio); + filemap_end_dropbehind_write(folio); + acct_reclaim_writeback(folio); folio_put(folio); } EXPORT_SYMBOL(folio_end_writeback); @@ -2651,8 +2653,7 @@ static void filemap_end_dropbehind_read(struct folio *folio) if (folio_test_writeback(folio) || folio_test_dirty(folio)) return; if (folio_trylock(folio)) { - if (folio_test_clear_dropbehind(folio)) - filemap_end_dropbehind(folio); + filemap_end_dropbehind(folio); folio_unlock(folio); } } From 34ecde3c56066ba79e5ec3d93c5b14ea83e3603e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 May 2025 17:01:31 -0600 Subject: [PATCH 06/10] iomap: don't lose folio dropbehind state for overwrites DONTCACHE I/O must have the completion punted to a workqueue, just like what is done for unwritten extents, as the completion needs task context to perform the invalidation of the folio(s). However, if writeback is started off filemap_fdatawrite_range() off generic_sync() and it's an overwrite, then the DONTCACHE marking gets lost as iomap_add_to_ioend() don't look at the folio being added and no further state is passed down to help it know that this is a dropbehind/DONTCACHE write. Check if the folio being added is marked as dropbehind, and set IOMAP_IOEND_DONTCACHE if that is the case. Then XFS can factor this into the decision making of completion context in xfs_submit_ioend(). Additionally include this ioend flag in the NOMERGE flags, to avoid mixing it with unrelated IO. Since this is the 3rd flag that will cause XFS to punt the completion to a workqueue, add a helper so that each one of them can get appropriately commented. This fixes extra page cache being instantiated when the write performed is an overwrite, rather than newly instantiated blocks. Fixes: b2cd5ae693a3 ("iomap: make buffered writes work with RWF_DONTCACHE") Signed-off-by: Jens Axboe Link: https://lore.kernel.org/5153f6e8-274d-4546-bf55-30a5018e0d03@kernel.dk Reviewed-by: Dave Chinner Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 2 ++ fs/xfs/xfs_aops.c | 22 ++++++++++++++++++++-- include/linux/iomap.h | 5 ++++- 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 233abf598f65..3729391a18f3 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1691,6 +1691,8 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, ioend_flags |= IOMAP_IOEND_UNWRITTEN; if (wpc->iomap.flags & IOMAP_F_SHARED) ioend_flags |= IOMAP_IOEND_SHARED; + if (folio_test_dropbehind(folio)) + ioend_flags |= IOMAP_IOEND_DONTCACHE; if (pos == wpc->iomap.offset && (wpc->iomap.flags & IOMAP_F_BOUNDARY)) ioend_flags |= IOMAP_IOEND_BOUNDARY; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 26a04a783489..63151feb9c3f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -436,6 +436,25 @@ allocate_blocks: return 0; } +static bool +xfs_ioend_needs_wq_completion( + struct iomap_ioend *ioend) +{ + /* Changing inode size requires a transaction. */ + if (xfs_ioend_is_append(ioend)) + return true; + + /* Extent manipulation requires a transaction. */ + if (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED)) + return true; + + /* Page cache invalidation cannot be done in irq context. */ + if (ioend->io_flags & IOMAP_IOEND_DONTCACHE) + return true; + + return false; +} + static int xfs_submit_ioend( struct iomap_writepage_ctx *wpc, @@ -460,8 +479,7 @@ xfs_submit_ioend( memalloc_nofs_restore(nofs_flag); /* send ioends that might require a transaction to the completion wq */ - if (xfs_ioend_is_append(ioend) || - (ioend->io_flags & (IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_SHARED))) + if (xfs_ioend_needs_wq_completion(ioend)) ioend->io_bio.bi_end_io = xfs_end_bio; if (status) diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 68416b135151..522644d62f30 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -377,13 +377,16 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno, #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) +/* is DONTCACHE I/O */ +#define IOMAP_IOEND_DONTCACHE (1U << 4) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ - (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) + (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT | \ + IOMAP_IOEND_DONTCACHE) /* * Structure for writeback I/O completions. From dd59137bfe70cf3646021b4721e430213b9c71bd Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 23 May 2025 14:37:49 +1000 Subject: [PATCH 07/10] fs/dax: Fix "don't skip locked entries when scanning entries" Commit 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries") introduced a new function, wait_entry_unlocked_exclusive(), which waits for the current entry to become unlocked without advancing the XArray iterator state. Waiting for the entry to become unlocked requires dropping the XArray lock. This requires calling xas_pause() prior to dropping the lock which leaves the xas in a suitable state for the next iteration. However this has the side-effect of advancing the xas state to the next index. Normally this isn't an issue because xas_for_each() contains code to detect this state and thus avoid advancing the index a second time on the next loop iteration. However both callers of and wait_entry_unlocked_exclusive() itself subsequently use the xas state to reload the entry. As xas_pause() updated the state to the next index this will cause the current entry which is being waited on to be skipped. This caused the following warning to fire intermittently when running xftest generic/068 on an XFS filesystem with FS DAX enabled: [ 35.067397] ------------[ cut here ]------------ [ 35.068229] WARNING: CPU: 21 PID: 1640 at mm/truncate.c:89 truncate_folio_batch_exceptionals+0xd8/0x1e0 [ 35.069717] Modules linked in: nd_pmem dax_pmem nd_btt nd_e820 libnvdimm [ 35.071006] CPU: 21 UID: 0 PID: 1640 Comm: fstest Not tainted 6.15.0-rc7+ #77 PREEMPT(voluntary) [ 35.072613] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.3-0-ga6ed6b701f0a-prebuilt.qemu.org 04/01/204 [ 35.074845] RIP: 0010:truncate_folio_batch_exceptionals+0xd8/0x1e0 [ 35.075962] Code: a1 00 00 00 f6 47 0d 20 0f 84 97 00 00 00 4c 63 e8 41 39 c4 7f 0b eb 61 49 83 c5 01 45 39 ec 7e 58 42 f68 [ 35.079522] RSP: 0018:ffffb04e426c7850 EFLAGS: 00010202 [ 35.080359] RAX: 0000000000000000 RBX: ffff9d21e3481908 RCX: ffffb04e426c77f4 [ 35.081477] RDX: ffffb04e426c79e8 RSI: ffffb04e426c79e0 RDI: ffff9d21e34816e8 [ 35.082590] RBP: ffffb04e426c79e0 R08: 0000000000000001 R09: 0000000000000003 [ 35.083733] R10: 0000000000000000 R11: 822b53c0f7a49868 R12: 000000000000001f [ 35.084850] R13: 0000000000000000 R14: ffffb04e426c78e8 R15: fffffffffffffffe [ 35.085953] FS: 00007f9134c87740(0000) GS:ffff9d22abba0000(0000) knlGS:0000000000000000 [ 35.087346] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.088244] CR2: 00007f9134c86000 CR3: 000000040afff000 CR4: 00000000000006f0 [ 35.089354] Call Trace: [ 35.089749] [ 35.090168] truncate_inode_pages_range+0xfc/0x4d0 [ 35.091078] truncate_pagecache+0x47/0x60 [ 35.091735] xfs_setattr_size+0xc7/0x3e0 [ 35.092648] xfs_vn_setattr+0x1ea/0x270 [ 35.093437] notify_change+0x1f4/0x510 [ 35.094219] ? do_truncate+0x97/0xe0 [ 35.094879] do_truncate+0x97/0xe0 [ 35.095640] path_openat+0xabd/0xca0 [ 35.096278] do_filp_open+0xd7/0x190 [ 35.096860] do_sys_openat2+0x8a/0xe0 [ 35.097459] __x64_sys_openat+0x6d/0xa0 [ 35.098076] do_syscall_64+0xbb/0x1d0 [ 35.098647] entry_SYSCALL_64_after_hwframe+0x77/0x7f [ 35.099444] RIP: 0033:0x7f9134d81fc1 [ 35.100033] Code: 75 57 89 f0 25 00 00 41 00 3d 00 00 41 00 74 49 80 3d 2a 26 0e 00 00 74 6d 89 da 48 89 ee bf 9c ff ff ff5 [ 35.102993] RSP: 002b:00007ffcd41e0d10 EFLAGS: 00000202 ORIG_RAX: 0000000000000101 [ 35.104263] RAX: ffffffffffffffda RBX: 0000000000000242 RCX: 00007f9134d81fc1 [ 35.105452] RDX: 0000000000000242 RSI: 00007ffcd41e1200 RDI: 00000000ffffff9c [ 35.106663] RBP: 00007ffcd41e1200 R08: 0000000000000000 R09: 0000000000000064 [ 35.107923] R10: 00000000000001a4 R11: 0000000000000202 R12: 0000000000000066 [ 35.109112] R13: 0000000000100000 R14: 0000000000100000 R15: 0000000000000400 [ 35.110357] [ 35.110769] irq event stamp: 8415587 [ 35.111486] hardirqs last enabled at (8415599): [] __up_console_sem+0x52/0x60 [ 35.113067] hardirqs last disabled at (8415610): [] __up_console_sem+0x37/0x60 [ 35.114575] softirqs last enabled at (8415300): [] handle_softirqs+0x315/0x3f0 [ 35.115933] softirqs last disabled at (8415291): [] __irq_exit_rcu+0xa1/0xc0 [ 35.117316] ---[ end trace 0000000000000000 ]--- Fix this by using xas_reset() instead, which is equivalent in implementation to xas_pause() but does not advance the XArray state. Fixes: 6be3e21d25ca ("fs/dax: don't skip locked entries when scanning entries") Signed-off-by: Alistair Popple Link: https://lore.kernel.org/20250523043749.1460780-1-apopple@nvidia.com Reviewed-by: Dan Williams Reviewed-by: Jan Kara Cc: Dan Williams Cc: Alison Schofield Cc: "Matthew Wilcow (Oracle)" Cc: Balbir Singh Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: David Hildenbrand Cc: Jan Kara Cc: John Hubbard Cc: Ted Ts'o Cc: Alexander Viro Cc: Christian Brauner Signed-off-by: Christian Brauner --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 676303419e9e..f8d8b1afd232 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -257,7 +257,7 @@ static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry) wq = dax_entry_waitqueue(xas, entry, &ewait.key); prepare_to_wait_exclusive(wq, &ewait.wait, TASK_UNINTERRUPTIBLE); - xas_pause(xas); + xas_reset(xas); xas_unlock_irq(xas); schedule(); finish_wait(wq, &ewait.wait); From 15ecd83dc06277385ad71dc7ea26911d9a79acaf Mon Sep 17 00:00:00 2001 From: Pekka Ristola Date: Tue, 27 May 2025 20:48:55 +0000 Subject: [PATCH 08/10] rust: file: mark `LocalFile` as `repr(transparent)` Unsafe code in `LocalFile`'s methods assumes that the type has the same layout as the inner `bindings::file`. This is not guaranteed by the default struct representation in Rust, but requires specifying the `transparent` representation. The `File` struct (which also wraps `bindings::file`) is already marked as `repr(transparent)`, so this change makes their layouts equivalent. Fixes: 851849824bb5 ("rust: file: add Rust abstraction for `struct file`") Closes: https://github.com/Rust-for-Linux/linux/issues/1165 Signed-off-by: Pekka Ristola Link: https://lore.kernel.org/20250527204636.12573-1-pekkarr@protonmail.com Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Signed-off-by: Christian Brauner --- rust/kernel/fs/file.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 13a0e44cd1aa..138693bdeb3f 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -219,6 +219,7 @@ unsafe impl AlwaysRefCounted for File { /// must be on the same thread as this file. /// /// [`assume_no_fdget_pos`]: LocalFile::assume_no_fdget_pos +#[repr(transparent)] pub struct LocalFile { inner: Opaque, } From 946026ba4293a14970b4b0b72f5f0cbb698ad77e Mon Sep 17 00:00:00 2001 From: Pekka Ristola Date: Tue, 27 May 2025 20:48:59 +0000 Subject: [PATCH 09/10] rust: file: improve safety comments Some of the safety comments in `LocalFile`'s methods incorrectly refer to the `File` type instead of `LocalFile`, so fix them to use the correct type. Also add missing Markdown code spans around lifetimes in the safety comments, i.e. change 'a to `'a`. Link: https://github.com/Rust-for-Linux/linux/issues/1165 Signed-off-by: Pekka Ristola Link: https://lore.kernel.org/20250527204636.12573-2-pekkarr@protonmail.com Reviewed-by: Benno Lossin Reviewed-by: Alice Ryhl Signed-off-by: Christian Brauner --- rust/kernel/fs/file.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/rust/kernel/fs/file.rs b/rust/kernel/fs/file.rs index 138693bdeb3f..72d84fb0e266 100644 --- a/rust/kernel/fs/file.rs +++ b/rust/kernel/fs/file.rs @@ -225,7 +225,7 @@ pub struct LocalFile { } // SAFETY: The type invariants guarantee that `LocalFile` is always ref-counted. This implementation -// makes `ARef` own a normal refcount. +// makes `ARef` own a normal refcount. unsafe impl AlwaysRefCounted for LocalFile { #[inline] fn inc_ref(&self) { @@ -236,7 +236,8 @@ unsafe impl AlwaysRefCounted for LocalFile { #[inline] unsafe fn dec_ref(obj: ptr::NonNull) { // SAFETY: To call this method, the caller passes us ownership of a normal refcount, so we - // may drop it. The cast is okay since `File` has the same representation as `struct file`. + // may drop it. The cast is okay since `LocalFile` has the same representation as + // `struct file`. unsafe { bindings::fput(obj.cast().as_ptr()) } } } @@ -274,7 +275,7 @@ impl LocalFile { #[inline] pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a LocalFile { // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the - // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // duration of `'a`. The cast is okay because `LocalFile` is `repr(transparent)`. // // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. unsafe { &*ptr.cast() } @@ -348,7 +349,7 @@ impl File { #[inline] pub unsafe fn from_raw_file<'a>(ptr: *const bindings::file) -> &'a File { // SAFETY: The caller guarantees that the pointer is not dangling and stays valid for the - // duration of 'a. The cast is okay because `File` is `repr(transparent)`. + // duration of `'a`. The cast is okay because `File` is `repr(transparent)`. // // INVARIANT: The caller guarantees that there are no problematic `fdget_pos` calls. unsafe { &*ptr.cast() } From 5402c4d4d2000a9baa30c1157c97152ec6383733 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 25 May 2025 12:47:31 +0200 Subject: [PATCH 10/10] exportfs: require ->fh_to_parent() to encode connectable file handles When user requests a connectable file handle explicitly with the AT_HANDLE_CONNECTABLE flag, fail the request if filesystem (e.g. nfs) does not know how to decode a connected non-dir dentry. Fixes: c374196b2b9f ("fs: name_to_handle_at() support for "explicit connectable" file handles") Signed-off-by: Amir Goldstein Link: https://lore.kernel.org/20250525104731.1461704-1-amir73il@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/exportfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index fc93f0abf513..25c4a5afbd44 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -314,6 +314,9 @@ static inline bool exportfs_can_decode_fh(const struct export_operations *nop) static inline bool exportfs_can_encode_fh(const struct export_operations *nop, int fh_flags) { + if (!nop) + return false; + /* * If a non-decodeable file handle was requested, we only need to make * sure that filesystem did not opt-out of encoding fid. @@ -321,6 +324,13 @@ static inline bool exportfs_can_encode_fh(const struct export_operations *nop, if (fh_flags & EXPORT_FH_FID) return exportfs_can_encode_fid(nop); + /* + * If a connectable file handle was requested, we need to make sure that + * filesystem can also decode connected file handles. + */ + if ((fh_flags & EXPORT_FH_CONNECTABLE) && !nop->fh_to_parent) + return false; + /* * If a decodeable file handle was requested, we need to make sure that * filesystem can also decode file handles.