vfs-6.16-rc5.fixes

-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaGeHBAAKCRCRxhvAZXjc
 omJNAQCnHIDuiscCUFeevb5sMNqws6td2kexX8reLxbdzzTrFgEAwAKxy5BVhNlg
 NusCZ2taYmenAK+HjI3JEw6c/3IKqwE=
 =NxGx
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.16-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner:

 - Fix a regression caused by the anonymous inode rework. Making them
   regular files causes various places in the kernel to tip over
   starting with io_uring.

   Revert to the former status quo and port our assertion to be based on
   checking the inode so we don't lose the valuable VFS_*_ON_*()
   assertions that have already helped discover weird behavior our
   outright bugs.

 - Fix the the upper bound calculation in fuse_fill_write_pages()

 - Fix priority inversion issues in the eventpoll code

 - Make secretmen use anon_inode_make_secure_inode() to avoid bypassing
   the LSM layer

 - Fix a netfs hang due to missing case in final DIO read result
   collection

 - Fix a double put of the netfs_io_request struct

 - Provide some helpers to abstract out NETFS_RREQ_IN_PROGRESS flag
   wrangling

 - Fix infinite looping in netfs_wait_for_pause/request()

 - Fix a netfs ref leak on an extra subrequest inserted into a request's
   list of subreqs

 - Fix various cifs RPC callbacks to set NETFS_SREQ_NEED_RETRY if a
   subrequest fails retriably

 - Fix a cifs warning in the workqueue code when reconnecting a channel

 - Fix the updating of i_size in netfs to avoid a race between testing
   if we should have extended the file with a DIO write and changing
   i_size

 - Merge the places in netfs that update i_size on write

 - Fix coredump socket selftests

* tag 'vfs-6.16-rc5.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
  anon_inode: rework assertions
  netfs: Update tracepoints in a number of ways
  netfs: Renumber the NETFS_RREQ_* flags to make traces easier to read
  netfs: Merge i_size update functions
  netfs: Fix i_size updating
  smb: client: set missing retry flag in cifs_writev_callback()
  smb: client: set missing retry flag in cifs_readv_callback()
  smb: client: set missing retry flag in smb2_writev_callback()
  netfs: Fix ref leak on inserted extra subreq in write retry
  netfs: Fix looping in wait functions
  netfs: Provide helpers to perform NETFS_RREQ_IN_PROGRESS flag wangling
  netfs: Fix double put of request
  netfs: Fix hang due to missing case in final DIO read result collection
  eventpoll: Fix priority inversion problem
  fuse: fix fuse_fill_write_pages() upper bound calculation
  fs: export anon_inode_make_secure_inode() and fix secretmem LSM bypass
  selftests/coredump: Fix "socket_detect_userspace_client" test failure
This commit is contained in:
Linus Torvalds 2025-07-04 09:06:49 -07:00
commit 2eb7f03acf
21 changed files with 354 additions and 441 deletions

View File

@ -98,14 +98,25 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
static struct inode *anon_inode_make_secure_inode(
const char *name,
const struct inode *context_inode)
/**
* anon_inode_make_secure_inode - allocate an anonymous inode with security context
* @sb: [in] Superblock to allocate from
* @name: [in] Name of the class of the newfile (e.g., "secretmem")
* @context_inode:
* [in] Optional parent inode for security inheritance
*
* The function ensures proper security initialization through the LSM hook
* security_inode_init_security_anon().
*
* Return: Pointer to new inode on success, ERR_PTR on failure.
*/
struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
const struct inode *context_inode)
{
struct inode *inode;
int error;
inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
inode = alloc_anon_inode(sb);
if (IS_ERR(inode))
return inode;
inode->i_flags &= ~S_PRIVATE;
@ -118,6 +129,7 @@ static struct inode *anon_inode_make_secure_inode(
}
return inode;
}
EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm");
static struct file *__anon_inode_getfile(const char *name,
const struct file_operations *fops,
@ -132,7 +144,8 @@ static struct file *__anon_inode_getfile(const char *name,
return ERR_PTR(-ENOENT);
if (make_inode) {
inode = anon_inode_make_secure_inode(name, context_inode);
inode = anon_inode_make_secure_inode(anon_inode_mnt->mnt_sb,
name, context_inode);
if (IS_ERR(inode)) {
file = ERR_CAST(inode);
goto err;

View File

@ -137,13 +137,7 @@ struct epitem {
};
/* List header used to link this structure to the eventpoll ready list */
struct list_head rdllink;
/*
* Works together "struct eventpoll"->ovflist in keeping the
* single linked chain of items.
*/
struct epitem *next;
struct llist_node rdllink;
/* The file descriptor information this item refers to */
struct epoll_filefd ffd;
@ -191,22 +185,15 @@ struct eventpoll {
/* Wait queue used by file->poll() */
wait_queue_head_t poll_wait;
/* List of ready file descriptors */
struct list_head rdllist;
/* Lock which protects rdllist and ovflist */
rwlock_t lock;
/*
* List of ready file descriptors. Adding to this list is lockless. Items can be removed
* only with eventpoll::mtx
*/
struct llist_head rdllist;
/* RB tree root used to store monitored fd structs */
struct rb_root_cached rbr;
/*
* This is a single linked list that chains all the "struct epitem" that
* happened while transferring ready events to userspace w/out
* holding ->lock.
*/
struct epitem *ovflist;
/* wakeup_source used when ep_send_events or __ep_eventpoll_poll is running */
struct wakeup_source *ws;
@ -361,10 +348,14 @@ static inline int ep_cmp_ffd(struct epoll_filefd *p1,
(p1->file < p2->file ? -1 : p1->fd - p2->fd));
}
/* Tells us if the item is currently linked */
static inline int ep_is_linked(struct epitem *epi)
/*
* Add the item to its container eventpoll's rdllist; do nothing if the item is already on rdllist.
*/
static void epitem_ready(struct epitem *epi)
{
return !list_empty(&epi->rdllink);
if (&epi->rdllink == cmpxchg(&epi->rdllink.next, &epi->rdllink, NULL))
llist_add(&epi->rdllink, &epi->ep->rdllist);
}
static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_entry_t *p)
@ -383,13 +374,26 @@ static inline struct epitem *ep_item_from_wait(wait_queue_entry_t *p)
*
* @ep: Pointer to the eventpoll context.
*
* Return: a value different than %zero if ready events are available,
* or %zero otherwise.
* Return: true if ready events might be available, false otherwise.
*/
static inline int ep_events_available(struct eventpoll *ep)
static inline bool ep_events_available(struct eventpoll *ep)
{
return !list_empty_careful(&ep->rdllist) ||
READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR;
bool available;
int locked;
locked = mutex_trylock(&ep->mtx);
if (!locked) {
/*
* The lock held and someone might have removed all items while inspecting it. The
* llist_empty() check in this case is futile. Assume that something is enqueued and
* let ep_try_send_events() figure it out.
*/
return true;
}
available = !llist_empty(&ep->rdllist);
mutex_unlock(&ep->mtx);
return available;
}
#ifdef CONFIG_NET_RX_BUSY_POLL
@ -724,77 +728,6 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
rcu_read_unlock();
}
/*
* ep->mutex needs to be held because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
static void ep_start_scan(struct eventpoll *ep, struct list_head *txlist)
{
/*
* Steal the ready list, and re-init the original one to the
* empty list. Also, set ep->ovflist to NULL so that events
* happening while looping w/out locks, are not lost. We cannot
* have the poll callback to queue directly on ep->rdllist,
* because we want the "sproc" callback to be able to do it
* in a lockless way.
*/
lockdep_assert_irqs_enabled();
write_lock_irq(&ep->lock);
list_splice_init(&ep->rdllist, txlist);
WRITE_ONCE(ep->ovflist, NULL);
write_unlock_irq(&ep->lock);
}
static void ep_done_scan(struct eventpoll *ep,
struct list_head *txlist)
{
struct epitem *epi, *nepi;
write_lock_irq(&ep->lock);
/*
* During the time we spent inside the "sproc" callback, some
* other events might have been queued by the poll callback.
* We re-insert them inside the main ready-list here.
*/
for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL;
nepi = epi->next, epi->next = EP_UNACTIVE_PTR) {
/*
* We need to check if the item is already in the list.
* During the "sproc" callback execution time, items are
* queued into ->ovflist but the "txlist" might already
* contain them, and the list_splice() below takes care of them.
*/
if (!ep_is_linked(epi)) {
/*
* ->ovflist is LIFO, so we have to reverse it in order
* to keep in FIFO.
*/
list_add(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
}
}
/*
* We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after
* releasing the lock, events will be queued in the normal way inside
* ep->rdllist.
*/
WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR);
/*
* Quickly re-inject items left on "txlist".
*/
list_splice(txlist, &ep->rdllist);
__pm_relax(ep->ws);
if (!list_empty(&ep->rdllist)) {
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
}
write_unlock_irq(&ep->lock);
}
static void ep_get(struct eventpoll *ep)
{
refcount_inc(&ep->refcount);
@ -832,10 +765,12 @@ static void ep_free(struct eventpoll *ep)
static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
{
struct file *file = epi->ffd.file;
struct llist_node *put_back_last;
struct epitems_head *to_free;
struct hlist_head *head;
LLIST_HEAD(put_back);
lockdep_assert_irqs_enabled();
lockdep_assert_held(&ep->mtx);
/*
* Removes poll wait queue hooks.
@ -867,10 +802,20 @@ static bool __ep_remove(struct eventpoll *ep, struct epitem *epi, bool force)
rb_erase_cached(&epi->rbn, &ep->rbr);
write_lock_irq(&ep->lock);
if (ep_is_linked(epi))
list_del_init(&epi->rdllink);
write_unlock_irq(&ep->lock);
if (llist_on_list(&epi->rdllink)) {
put_back_last = NULL;
while (true) {
struct llist_node *n = llist_del_first(&ep->rdllist);
if (&epi->rdllink == n || WARN_ON(!n))
break;
if (!put_back_last)
put_back_last = n;
__llist_add(n, &put_back);
}
if (put_back_last)
llist_add_batch(put_back.first, put_back_last, &ep->rdllist);
}
wakeup_source_unregister(ep_wakeup_source(epi));
/*
@ -974,8 +919,9 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt, int depth
static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int depth)
{
struct eventpoll *ep = file->private_data;
LIST_HEAD(txlist);
struct epitem *epi, *tmp;
struct wakeup_source *ws;
struct llist_node *n;
struct epitem *epi;
poll_table pt;
__poll_t res = 0;
@ -989,22 +935,39 @@ static __poll_t __ep_eventpoll_poll(struct file *file, poll_table *wait, int dep
* the ready list.
*/
mutex_lock_nested(&ep->mtx, depth);
ep_start_scan(ep, &txlist);
list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
while (true) {
n = llist_del_first_init(&ep->rdllist);
if (!n)
break;
epi = llist_entry(n, struct epitem, rdllink);
if (ep_item_poll(epi, &pt, depth + 1)) {
res = EPOLLIN | EPOLLRDNORM;
epitem_ready(epi);
break;
} else {
/*
* Item has been dropped into the ready list by the poll
* callback, but it's not actually ready, as far as
* caller requested events goes. We can remove it here.
* We need to activate ep before deactivating epi, to prevent autosuspend
* just in case epi becomes active after ep_item_poll() above.
*
* This is similar to ep_send_events().
*/
ws = ep_wakeup_source(epi);
if (ws) {
if (ws->active)
__pm_stay_awake(ep->ws);
__pm_relax(ws);
}
__pm_relax(ep_wakeup_source(epi));
list_del_init(&epi->rdllink);
/* Just in case epi becomes active right before __pm_relax() */
if (unlikely(ep_item_poll(epi, &pt, depth + 1)))
ep_pm_stay_awake(epi);
__pm_relax(ep->ws);
}
}
ep_done_scan(ep, &txlist);
mutex_unlock(&ep->mtx);
return res;
}
@ -1153,12 +1116,10 @@ static int ep_alloc(struct eventpoll **pep)
return -ENOMEM;
mutex_init(&ep->mtx);
rwlock_init(&ep->lock);
init_waitqueue_head(&ep->wq);
init_waitqueue_head(&ep->poll_wait);
INIT_LIST_HEAD(&ep->rdllist);
init_llist_head(&ep->rdllist);
ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
ep->user = get_current_user();
refcount_set(&ep->refcount, 1);
@ -1240,94 +1201,11 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd,
}
#endif /* CONFIG_KCMP */
/*
* Adds a new entry to the tail of the list in a lockless way, i.e.
* multiple CPUs are allowed to call this function concurrently.
*
* Beware: it is necessary to prevent any other modifications of the
* existing list until all changes are completed, in other words
* concurrent list_add_tail_lockless() calls should be protected
* with a read lock, where write lock acts as a barrier which
* makes sure all list_add_tail_lockless() calls are fully
* completed.
*
* Also an element can be locklessly added to the list only in one
* direction i.e. either to the tail or to the head, otherwise
* concurrent access will corrupt the list.
*
* Return: %false if element has been already added to the list, %true
* otherwise.
*/
static inline bool list_add_tail_lockless(struct list_head *new,
struct list_head *head)
{
struct list_head *prev;
/*
* This is simple 'new->next = head' operation, but cmpxchg()
* is used in order to detect that same element has been just
* added to the list from another CPU: the winner observes
* new->next == new.
*/
if (!try_cmpxchg(&new->next, &new, head))
return false;
/*
* Initially ->next of a new element must be updated with the head
* (we are inserting to the tail) and only then pointers are atomically
* exchanged. XCHG guarantees memory ordering, thus ->next should be
* updated before pointers are actually swapped and pointers are
* swapped before prev->next is updated.
*/
prev = xchg(&head->prev, new);
/*
* It is safe to modify prev->next and new->prev, because a new element
* is added only to the tail and new->next is updated before XCHG.
*/
prev->next = new;
new->prev = prev;
return true;
}
/*
* Chains a new epi entry to the tail of the ep->ovflist in a lockless way,
* i.e. multiple CPUs are allowed to call this function concurrently.
*
* Return: %false if epi element has been already chained, %true otherwise.
*/
static inline bool chain_epi_lockless(struct epitem *epi)
{
struct eventpoll *ep = epi->ep;
/* Fast preliminary check */
if (epi->next != EP_UNACTIVE_PTR)
return false;
/* Check that the same epi has not been just chained from another CPU */
if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR)
return false;
/* Atomically exchange tail */
epi->next = xchg(&ep->ovflist, epi);
return true;
}
/*
* This is the callback that is passed to the wait queue wakeup
* mechanism. It is called by the stored file descriptors when they
* have events to report.
*
* This callback takes a read lock in order not to contend with concurrent
* events from another file descriptor, thus all modifications to ->rdllist
* or ->ovflist are lockless. Read lock is paired with the write lock from
* ep_start/done_scan(), which stops all list modifications and guarantees
* that lists state is seen correctly.
*
* Another thing worth to mention is that ep_poll_callback() can be called
* concurrently for the same @epi from different CPUs if poll table was inited
* with several wait queues entries. Plural wakeup from different CPUs of a
@ -1337,15 +1215,11 @@ static inline bool chain_epi_lockless(struct epitem *epi)
*/
static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
int pwake = 0;
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
__poll_t pollflags = key_to_poll(key);
unsigned long flags;
int ewake = 0;
read_lock_irqsave(&ep->lock, flags);
ep_set_busy_poll_napi_id(epi);
/*
@ -1355,7 +1229,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* until the next EPOLL_CTL_MOD will be issued.
*/
if (!(epi->event.events & ~EP_PRIVATE_BITS))
goto out_unlock;
goto out;
/*
* Check the events coming with the callback. At this stage, not
@ -1364,22 +1238,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
* test for "key" != NULL before the event match test.
*/
if (pollflags && !(pollflags & epi->event.events))
goto out_unlock;
goto out;
/*
* If we are transferring events to userspace, we can hold no locks
* (because we're accessing user memory, and because of linux f_op->poll()
* semantics). All the events that happen during that period of time are
* chained in ep->ovflist and requeued later on.
*/
if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) {
if (chain_epi_lockless(epi))
ep_pm_stay_awake_rcu(epi);
} else if (!ep_is_linked(epi)) {
/* In the usual case, add event to ready list. */
if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist))
ep_pm_stay_awake_rcu(epi);
}
ep_pm_stay_awake_rcu(epi);
epitem_ready(epi);
/*
* Wake up ( if active ) both the eventpoll wait list and the ->poll()
@ -1408,15 +1270,9 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
wake_up(&ep->wq);
}
if (waitqueue_active(&ep->poll_wait))
pwake++;
out_unlock:
read_unlock_irqrestore(&ep->lock, flags);
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
out:
if (!(epi->event.events & EPOLLEXCLUSIVE))
ewake = 1;
@ -1661,8 +1517,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
if (is_file_epoll(tfile))
tep = tfile->private_data;
lockdep_assert_irqs_enabled();
if (unlikely(percpu_counter_compare(&ep->user->epoll_watches,
max_user_watches) >= 0))
return -ENOSPC;
@ -1674,11 +1528,10 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
}
/* Item initialization follow here ... */
INIT_LIST_HEAD(&epi->rdllink);
init_llist_node(&epi->rdllink);
epi->ep = ep;
ep_set_ffd(&epi->ffd, tfile, fd);
epi->event = *event;
epi->next = EP_UNACTIVE_PTR;
if (tep)
mutex_lock_nested(&tep->mtx, 1);
@ -1745,16 +1598,13 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
return -ENOMEM;
}
/* We have to drop the new item inside our item list to keep track of it */
write_lock_irq(&ep->lock);
/* record NAPI ID of new item if present */
ep_set_busy_poll_napi_id(epi);
/* If the file is already "ready" we drop it inside the ready list */
if (revents && !ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
if (revents) {
ep_pm_stay_awake(epi);
epitem_ready(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@ -1763,8 +1613,6 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
pwake++;
}
write_unlock_irq(&ep->lock);
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(ep, NULL, 0);
@ -1779,11 +1627,8 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
static int ep_modify(struct eventpoll *ep, struct epitem *epi,
const struct epoll_event *event)
{
int pwake = 0;
poll_table pt;
lockdep_assert_irqs_enabled();
init_poll_funcptr(&pt, NULL);
/*
@ -1827,24 +1672,16 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
* list, push it inside.
*/
if (ep_item_poll(epi, &pt, 1)) {
write_lock_irq(&ep->lock);
if (!ep_is_linked(epi)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
ep_pm_stay_awake(epi);
epitem_ready(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
pwake++;
}
write_unlock_irq(&ep->lock);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
if (waitqueue_active(&ep->poll_wait))
ep_poll_safewake(ep, NULL, 0);
}
/* We have to call this outside the lock */
if (pwake)
ep_poll_safewake(ep, NULL, 0);
return 0;
}
@ -1852,7 +1689,7 @@ static int ep_send_events(struct eventpoll *ep,
struct epoll_event __user *events, int maxevents)
{
struct epitem *epi, *tmp;
LIST_HEAD(txlist);
LLIST_HEAD(txlist);
poll_table pt;
int res = 0;
@ -1867,19 +1704,18 @@ static int ep_send_events(struct eventpoll *ep,
init_poll_funcptr(&pt, NULL);
mutex_lock(&ep->mtx);
ep_start_scan(ep, &txlist);
/*
* We can loop without lock because we are passed a task private list.
* Items cannot vanish during the loop we are holding ep->mtx.
*/
list_for_each_entry_safe(epi, tmp, &txlist, rdllink) {
while (res < maxevents) {
struct wakeup_source *ws;
struct llist_node *n;
__poll_t revents;
if (res >= maxevents)
n = llist_del_first(&ep->rdllist);
if (!n)
break;
epi = llist_entry(n, struct epitem, rdllink);
/*
* Activate ep->ws before deactivating epi->ws to prevent
* triggering auto-suspend here (in case we reactive epi->ws
@ -1896,21 +1732,30 @@ static int ep_send_events(struct eventpoll *ep,
__pm_relax(ws);
}
list_del_init(&epi->rdllink);
/*
* If the event mask intersect the caller-requested one,
* deliver the event to userspace. Again, we are holding ep->mtx,
* so no operations coming from userspace can change the item.
*/
revents = ep_item_poll(epi, &pt, 1);
if (!revents)
if (!revents) {
init_llist_node(n);
/*
* Just in case epi becomes ready after ep_item_poll() above, but before
* init_llist_node(). Make sure to add it to the ready list, otherwise an
* event may be lost.
*/
if (unlikely(ep_item_poll(epi, &pt, 1))) {
ep_pm_stay_awake(epi);
epitem_ready(epi);
}
continue;
}
events = epoll_put_uevent(revents, epi->event.data, events);
if (!events) {
list_add(&epi->rdllink, &txlist);
ep_pm_stay_awake(epi);
llist_add(&epi->rdllink, &ep->rdllist);
if (!res)
res = -EFAULT;
break;
@ -1918,25 +1763,31 @@ static int ep_send_events(struct eventpoll *ep,
res++;
if (epi->event.events & EPOLLONESHOT)
epi->event.events &= EP_PRIVATE_BITS;
else if (!(epi->event.events & EPOLLET)) {
__llist_add(n, &txlist);
}
llist_for_each_entry_safe(epi, tmp, txlist.first, rdllink) {
init_llist_node(&epi->rdllink);
if (!(epi->event.events & EPOLLET)) {
/*
* If this file has been added with Level
* Trigger mode, we need to insert back inside
* the ready list, so that the next call to
* epoll_wait() will check again the events
* availability. At this point, no one can insert
* into ep->rdllist besides us. The epoll_ctl()
* callers are locked out by
* ep_send_events() holding "mtx" and the
* poll callback will queue them in ep->ovflist.
* If this file has been added with Level Trigger mode, we need to insert
* back inside the ready list, so that the next call to epoll_wait() will
* check again the events availability.
*/
list_add_tail(&epi->rdllink, &ep->rdllist);
ep_pm_stay_awake(epi);
epitem_ready(epi);
}
}
ep_done_scan(ep, &txlist);
__pm_relax(ep->ws);
mutex_unlock(&ep->mtx);
if (!llist_empty(&ep->rdllist)) {
if (waitqueue_active(&ep->wq))
wake_up(&ep->wq);
}
return res;
}
@ -2029,8 +1880,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
wait_queue_entry_t wait;
ktime_t expires, *to = NULL;
lockdep_assert_irqs_enabled();
if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
slack = select_estimate_accuracy(timeout);
to = &expires;
@ -2090,54 +1939,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
init_wait(&wait);
wait.func = ep_autoremove_wake_function;
write_lock_irq(&ep->lock);
/*
* Barrierless variant, waitqueue_active() is called under
* the same lock on wakeup ep_poll_callback() side, so it
* is safe to avoid an explicit barrier.
*/
__set_current_state(TASK_INTERRUPTIBLE);
prepare_to_wait_exclusive(&ep->wq, &wait, TASK_INTERRUPTIBLE);
/*
* Do the final check under the lock. ep_start/done_scan()
* plays with two lists (->rdllist and ->ovflist) and there
* is always a race when both lists are empty for short
* period of time although events are pending, so lock is
* important.
*/
eavail = ep_events_available(ep);
if (!eavail)
__add_wait_queue_exclusive(&ep->wq, &wait);
write_unlock_irq(&ep->lock);
if (!eavail)
if (!ep_events_available(ep))
timed_out = !ep_schedule_timeout(to) ||
!schedule_hrtimeout_range(to, slack,
HRTIMER_MODE_ABS);
__set_current_state(TASK_RUNNING);
/*
* We were woken up, thus go and try to harvest some events.
* If timed out and still on the wait queue, recheck eavail
* carefully under lock, below.
*/
eavail = 1;
if (!list_empty_careful(&wait.entry)) {
write_lock_irq(&ep->lock);
/*
* If the thread timed out and is not on the wait queue,
* it means that the thread was woken up after its
* timeout expired before it could reacquire the lock.
* Thus, when wait.entry is empty, it needs to harvest
* events.
*/
if (timed_out)
eavail = list_empty(&wait.entry);
__remove_wait_queue(&ep->wq, &wait);
write_unlock_irq(&ep->lock);
}
finish_wait(&ep->wq, &wait);
eavail = ep_events_available(ep);
}
}

View File

@ -114,6 +114,9 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
bool path_noexec(const struct path *path)
{
/* If it's an anonymous inode make sure that we catch any shenanigans. */
VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) &&
!(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC));
return (path->mnt->mnt_flags & MNT_NOEXEC) ||
(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
}
@ -781,13 +784,15 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
if (IS_ERR(file))
return file;
if (path_noexec(&file->f_path))
return ERR_PTR(-EACCES);
/*
* In the past the regular type check was here. It moved to may_open() in
* 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
* an invariant that all non-regular files error out before we get here.
*/
if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
path_noexec(&file->f_path))
if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)))
return ERR_PTR(-EACCES);
err = exe_file_deny_write_access(file);

View File

@ -1147,7 +1147,7 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
struct address_space *mapping,
struct iov_iter *ii, loff_t pos,
unsigned int max_pages)
unsigned int max_folios)
{
struct fuse_args_pages *ap = &ia->ap;
struct fuse_conn *fc = get_fuse_conn(mapping->host);
@ -1157,12 +1157,11 @@ static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia,
int err = 0;
num = min(iov_iter_count(ii), fc->max_write);
num = min(num, max_pages << PAGE_SHIFT);
ap->args.in_pages = true;
ap->descs[0].offset = offset;
while (num) {
while (num && ap->num_folios < max_folios) {
size_t tmp;
struct folio *folio;
pgoff_t index = pos >> PAGE_SHIFT;

View File

@ -1649,12 +1649,10 @@ struct inode *alloc_anon_inode(struct super_block *s)
*/
inode->i_state = I_DIRTY;
/*
* Historically anonymous inodes didn't have a type at all and
* userspace has come to rely on this. Internally they're just
* regular files but S_IFREG is masked off when reporting
* information to userspace.
* Historically anonymous inodes don't have a type at all and
* userspace has come to rely on this.
*/
inode->i_mode = S_IFREG | S_IRUSR | S_IWUSR;
inode->i_mode = S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE | S_ANON_INODE;

View File

@ -3480,7 +3480,7 @@ static int may_open(struct mnt_idmap *idmap, const struct path *path,
return -EACCES;
break;
default:
VFS_BUG_ON_INODE(1, inode);
VFS_BUG_ON_INODE(!IS_ANON_FILE(inode), inode);
}
error = inode_permission(idmap, inode, MAY_OPEN | acc_mode);

View File

@ -53,30 +53,40 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
* data written into the pagecache until we can find out from the server what
* the values actually are.
*/
static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
loff_t i_size, loff_t pos, size_t copied)
void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
loff_t pos, size_t copied)
{
loff_t i_size, end = pos + copied;
blkcnt_t add;
size_t gap;
if (end <= i_size_read(inode))
return;
if (ctx->ops->update_i_size) {
ctx->ops->update_i_size(inode, pos);
ctx->ops->update_i_size(inode, end);
return;
}
i_size_write(inode, pos);
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
if (end > i_size) {
i_size_write(inode, end);
#if IS_ENABLED(CONFIG_FSCACHE)
fscache_update_cookie(ctx->cache, NULL, &pos);
fscache_update_cookie(ctx->cache, NULL, &end);
#endif
gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
if (copied > gap) {
add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
if (copied > gap) {
add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
inode->i_blocks = min_t(blkcnt_t,
DIV_ROUND_UP(pos, SECTOR_SIZE),
inode->i_blocks + add);
inode->i_blocks = min_t(blkcnt_t,
DIV_ROUND_UP(end, SECTOR_SIZE),
inode->i_blocks + add);
}
}
spin_unlock(&inode->i_lock);
}
/**
@ -111,7 +121,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
struct folio *folio = NULL, *writethrough = NULL;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos;
loff_t pos = iocb->ki_pos;
size_t max_chunk = mapping_max_folio_size(mapping);
bool maybe_trouble = false;
@ -344,10 +354,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
flush_dcache_folio(folio);
/* Update the inode size if we moved the EOF marker */
netfs_update_i_size(ctx, inode, pos, copied);
pos += copied;
i_size = i_size_read(inode);
if (pos > i_size)
netfs_update_i_size(ctx, inode, i_size, pos, copied);
written += copied;
if (likely(!wreq)) {

View File

@ -9,20 +9,6 @@
#include <linux/uio.h>
#include "internal.h"
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
{
struct inode *inode = wreq->inode;
unsigned long long end = wreq->start + wreq->transferred;
if (!wreq->error &&
i_size_read(inode) < end) {
if (wreq->netfs_ops->update_i_size)
wreq->netfs_ops->update_i_size(inode, end);
else
i_size_write(inode, end);
}
}
/*
* Perform an unbuffered write where we may have to do an RMW operation on an
* encrypted file. This can also be used for direct I/O writes.
@ -98,7 +84,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
if (async)
wreq->iocb = iocb;
wreq->len = iov_iter_count(&wreq->buffer.iter);
wreq->cleanup = netfs_cleanup_dio_write;
ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), wreq->len);
if (ret < 0) {
_debug("begin = %zd", ret);
@ -106,7 +91,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
}
if (!async) {
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
ret = netfs_wait_for_write(wreq);
if (ret > 0)
iocb->ki_pos += ret;

View File

@ -27,6 +27,12 @@ void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error);
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
size_t offset, size_t len);
/*
* buffered_write.c
*/
void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
loff_t pos, size_t copied);
/*
* main.c
*/
@ -267,13 +273,31 @@ static inline void netfs_wake_rreq_flag(struct netfs_io_request *rreq,
enum netfs_rreq_trace trace)
{
if (test_bit(rreq_flag, &rreq->flags)) {
trace_netfs_rreq(rreq, trace);
clear_bit_unlock(rreq_flag, &rreq->flags);
smp_mb__after_atomic(); /* Set flag before task state */
trace_netfs_rreq(rreq, trace);
wake_up(&rreq->waitq);
}
}
/*
* Test the NETFS_RREQ_IN_PROGRESS flag, inserting an appropriate barrier.
*/
static inline bool netfs_check_rreq_in_progress(const struct netfs_io_request *rreq)
{
/* Order read of flags before read of anything else, such as error. */
return test_bit_acquire(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
}
/*
* Test the NETFS_SREQ_IN_PROGRESS flag, inserting an appropriate barrier.
*/
static inline bool netfs_check_subreq_in_progress(const struct netfs_io_subrequest *subreq)
{
/* Order read of flags before read of anything else, such as error. */
return test_bit_acquire(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
}
/*
* fscache-cache.c
*/

View File

@ -58,15 +58,15 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
if (v == &netfs_io_requests) {
seq_puts(m,
"REQUEST OR REF FL ERR OPS COVERAGE\n"
"======== == === == ==== === =========\n"
"REQUEST OR REF FLAG ERR OPS COVERAGE\n"
"======== == === ==== ==== === =========\n"
);
return 0;
}
rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m,
"%08x %s %3d %2lx %4ld %3d @%04llx %llx/%llx",
"%08x %s %3d %4lx %4ld %3d @%04llx %llx/%llx",
rreq->debug_id,
netfs_origins[rreq->origin],
refcount_read(&rreq->ref),

View File

@ -356,22 +356,22 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
DEFINE_WAIT(myself);
list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
if (!netfs_check_subreq_in_progress(subreq))
continue;
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_quiesce);
for (;;) {
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags))
if (!netfs_check_subreq_in_progress(subreq))
break;
trace_netfs_sreq(subreq, netfs_sreq_trace_wait_for);
schedule();
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
}
trace_netfs_rreq(rreq, netfs_rreq_trace_waited_quiesce);
finish_wait(&rreq->waitq, &myself);
}
@ -381,7 +381,12 @@ void netfs_wait_for_in_progress_stream(struct netfs_io_request *rreq,
static int netfs_collect_in_app(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
bool need_collect = false, inactive = true;
bool need_collect = false, inactive = true, done = true;
if (!netfs_check_rreq_in_progress(rreq)) {
trace_netfs_rreq(rreq, netfs_rreq_trace_recollect);
return 1; /* Done */
}
for (int i = 0; i < NR_IO_STREAMS; i++) {
struct netfs_io_subrequest *subreq;
@ -395,14 +400,16 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
struct netfs_io_subrequest,
rreq_link);
if (subreq &&
(!test_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags) ||
(!netfs_check_subreq_in_progress(subreq) ||
test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags))) {
need_collect = true;
break;
}
if (subreq || !test_bit(NETFS_RREQ_ALL_QUEUED, &rreq->flags))
done = false;
}
if (!need_collect && !inactive)
if (!need_collect && !inactive && !done)
return 0; /* Sleep */
__set_current_state(TASK_RUNNING);
@ -423,14 +430,13 @@ static int netfs_collect_in_app(struct netfs_io_request *rreq,
/*
* Wait for a request to complete, successfully or otherwise.
*/
static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
static ssize_t netfs_wait_for_in_progress(struct netfs_io_request *rreq,
bool (*collector)(struct netfs_io_request *rreq))
{
DEFINE_WAIT(myself);
ssize_t ret;
for (;;) {
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@ -440,18 +446,22 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
if (!netfs_check_rreq_in_progress(rreq))
break;
cond_resched();
continue;
}
}
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
if (!netfs_check_rreq_in_progress(rreq))
break;
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_ip);
schedule();
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
trace_netfs_rreq(rreq, netfs_rreq_trace_waited_ip);
finish_wait(&rreq->waitq, &myself);
ret = rreq->error;
@ -478,12 +488,12 @@ all_collected:
ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
{
return netfs_wait_for_request(rreq, netfs_read_collection);
return netfs_wait_for_in_progress(rreq, netfs_read_collection);
}
ssize_t netfs_wait_for_write(struct netfs_io_request *rreq)
{
return netfs_wait_for_request(rreq, netfs_write_collection);
return netfs_wait_for_in_progress(rreq, netfs_write_collection);
}
/*
@ -494,10 +504,8 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
{
DEFINE_WAIT(myself);
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
for (;;) {
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_queue);
trace_netfs_rreq(rreq, netfs_rreq_trace_wait_pause);
prepare_to_wait(&rreq->waitq, &myself, TASK_UNINTERRUPTIBLE);
if (!test_bit(NETFS_RREQ_OFFLOAD_COLLECTION, &rreq->flags)) {
@ -507,19 +515,23 @@ static void netfs_wait_for_pause(struct netfs_io_request *rreq,
case 1:
goto all_collected;
case 2:
if (!netfs_check_rreq_in_progress(rreq) ||
!test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
break;
cond_resched();
continue;
}
}
if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags) ||
if (!netfs_check_rreq_in_progress(rreq) ||
!test_bit(NETFS_RREQ_PAUSE, &rreq->flags))
break;
schedule();
trace_netfs_rreq(rreq, netfs_rreq_trace_woke_queue);
}
all_collected:
trace_netfs_rreq(rreq, netfs_rreq_trace_waited_pause);
finish_wait(&rreq->waitq, &myself);
}

View File

@ -218,7 +218,7 @@ reassess:
stream->collected_to = front->start;
}
if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags))
if (netfs_check_subreq_in_progress(front))
notes |= HIT_PENDING;
smp_rmb(); /* Read counters after IN_PROGRESS flag. */
transferred = READ_ONCE(front->transferred);
@ -293,7 +293,9 @@ reassess:
spin_lock(&rreq->lock);
remove = front;
trace_netfs_sreq(front, netfs_sreq_trace_discard);
trace_netfs_sreq(front,
notes & ABANDON_SREQ ?
netfs_sreq_trace_abandoned : netfs_sreq_trace_consumed);
list_del_init(&front->rreq_link);
front = list_first_entry_or_null(&stream->subrequests,
struct netfs_io_subrequest, rreq_link);
@ -353,9 +355,11 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
if (rreq->iocb->ki_complete)
if (rreq->iocb->ki_complete) {
trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
}
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@ -379,9 +383,11 @@ static void netfs_rreq_assess_single(struct netfs_io_request *rreq)
if (rreq->iocb) {
rreq->iocb->ki_pos += rreq->transferred;
if (rreq->iocb->ki_complete)
if (rreq->iocb->ki_complete) {
trace_netfs_rreq(rreq, netfs_rreq_trace_ki_complete);
rreq->iocb->ki_complete(
rreq->iocb, rreq->error ? rreq->error : rreq->transferred);
}
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
@ -445,7 +451,7 @@ void netfs_read_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_read_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);

View File

@ -240,7 +240,7 @@ reassess_streams:
}
/* Stall if the front is still undergoing I/O. */
if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
if (netfs_check_subreq_in_progress(front)) {
notes |= HIT_PENDING;
break;
}
@ -393,8 +393,10 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
ictx->ops->invalidate_cache(wreq);
}
if (wreq->cleanup)
wreq->cleanup(wreq);
if ((wreq->origin == NETFS_UNBUFFERED_WRITE ||
wreq->origin == NETFS_DIO_WRITE) &&
!wreq->error)
netfs_update_i_size(ictx, &ictx->inode, wreq->start, wreq->transferred);
if (wreq->origin == NETFS_DIO_WRITE &&
wreq->mapping->nrpages) {
@ -419,9 +421,11 @@ bool netfs_write_collection(struct netfs_io_request *wreq)
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
wreq->iocb->ki_pos += written;
if (wreq->iocb->ki_complete)
if (wreq->iocb->ki_complete) {
trace_netfs_rreq(wreq, netfs_rreq_trace_ki_complete);
wreq->iocb->ki_complete(
wreq->iocb, wreq->error ? wreq->error : written);
}
wreq->iocb = VFS_PTR_POISON;
}
@ -434,7 +438,7 @@ void netfs_write_collection_worker(struct work_struct *work)
struct netfs_io_request *rreq = container_of(work, struct netfs_io_request, work);
netfs_see_request(rreq, netfs_rreq_trace_see_work);
if (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags)) {
if (netfs_check_rreq_in_progress(rreq)) {
if (netfs_write_collection(rreq))
/* Drop the ref from the IN_PROGRESS flag. */
netfs_put_request(rreq, netfs_rreq_trace_put_work_ip);

View File

@ -146,14 +146,13 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq = netfs_alloc_subrequest(wreq);
subreq->source = to->source;
subreq->start = start;
subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = to->stream_nr;
subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
netfs_sreq_trace_new);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
trace_netfs_sreq(subreq, netfs_sreq_trace_split);
list_add(&subreq->rreq_link, &to->rreq_link);
to = list_next_entry(to, rreq_link);

View File

@ -1334,7 +1334,12 @@ cifs_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
goto do_retry;
case MID_RETRY_NEEDED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
do_retry:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
/* reset bytes number since we can not check a sign */
@ -1343,8 +1348,14 @@ cifs_readv_callback(struct mid_q_entry *mid)
task_io_account_read(rdata->got_bytes);
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
default:
case MID_RESPONSE_MALFORMED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
rdata->result = -EIO;
break;
default:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
break;
}
if (rdata->result == -ENODATA) {
@ -1713,10 +1724,21 @@ cifs_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
__set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RETRY_NEEDED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
__set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
result = -EIO;
break;
default:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}

View File

@ -4567,7 +4567,11 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_REQUEST_SUBMITTED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_req_submitted);
goto do_retry;
case MID_RETRY_NEEDED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_retry_needed);
do_retry:
__set_bit(NETFS_SREQ_NEED_RETRY, &rdata->subreq.flags);
rdata->result = -EAGAIN;
if (server->sign && rdata->got_bytes)
@ -4578,11 +4582,15 @@ smb2_readv_callback(struct mid_q_entry *mid)
cifs_stats_bytes_read(tcon, rdata->got_bytes);
break;
case MID_RESPONSE_MALFORMED:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(shdr->CreditRequest);
credits.instance = server->reconnect_instance;
fallthrough;
default:
rdata->result = -EIO;
break;
default:
trace_netfs_sreq(&rdata->subreq, netfs_sreq_trace_io_unknown);
rdata->result = -EIO;
break;
}
#ifdef CONFIG_CIFS_SMB_DIRECT
/*
@ -4835,11 +4843,14 @@ smb2_writev_callback(struct mid_q_entry *mid)
switch (mid->mid_state) {
case MID_RESPONSE_RECEIVED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
result = smb2_check_receive(mid, server, 0);
if (result != 0)
if (result != 0) {
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_bad);
break;
}
written = le32_to_cpu(rsp->DataLength);
/*
@ -4861,14 +4872,23 @@ smb2_writev_callback(struct mid_q_entry *mid)
}
break;
case MID_REQUEST_SUBMITTED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_req_submitted);
__set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RETRY_NEEDED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_retry_needed);
__set_bit(NETFS_SREQ_NEED_RETRY, &wdata->subreq.flags);
result = -EAGAIN;
break;
case MID_RESPONSE_MALFORMED:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_malformed);
credits.value = le16_to_cpu(rsp->hdr.CreditRequest);
credits.instance = server->reconnect_instance;
fallthrough;
result = -EIO;
break;
default:
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_unknown);
result = -EIO;
break;
}
@ -4908,7 +4928,6 @@ smb2_writev_callback(struct mid_q_entry *mid)
server->credits, server->in_flight,
0, cifs_trace_rw_credits_write_response_clear);
wdata->credits.value = 0;
trace_netfs_sreq(&wdata->subreq, netfs_sreq_trace_io_progress);
cifs_write_subrequest_terminated(wdata, result ?: written);
release_mid(mid);
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,

View File

@ -3608,6 +3608,8 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *name,
const struct inode *context_inode);
extern int simple_nosetlease(struct file *, int, struct file_lease **, void **);
extern const struct dentry_operations simple_dentry_operations;

View File

@ -265,21 +265,20 @@ struct netfs_io_request {
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
refcount_t ref;
unsigned long flags;
#define NETFS_RREQ_OFFLOAD_COLLECTION 0 /* Offload collection to workqueue */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes (has ref) */
#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 6 /* Copy current folio to cache from read */
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
#define NETFS_RREQ_IN_PROGRESS 0 /* Unlocked when the request completes (has ref) */
#define NETFS_RREQ_ALL_QUEUED 1 /* All subreqs are now queued */
#define NETFS_RREQ_PAUSE 2 /* Pause subrequest generation */
#define NETFS_RREQ_FAILED 3 /* The request failed */
#define NETFS_RREQ_RETRYING 4 /* Set if we're in the retry path */
#define NETFS_RREQ_SHORT_TRANSFER 5 /* Set if we have a short transfer */
#define NETFS_RREQ_OFFLOAD_COLLECTION 8 /* Offload collection to workqueue */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 9 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_FOLIO_COPY_TO_CACHE 10 /* Copy current folio to cache from read */
#define NETFS_RREQ_UPLOAD_TO_SERVER 11 /* Need to write to the server */
#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
#define NETFS_RREQ_RETRYING 14 /* Set if we're in the retry path */
#define NETFS_RREQ_SHORT_TRANSFER 15 /* Set if we have a short transfer */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
void (*cleanup)(struct netfs_io_request *req);
};
/*

View File

@ -50,12 +50,14 @@
#define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \
EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_collect, "COLLECT") \
EM(netfs_rreq_trace_complete, "COMPLET") \
EM(netfs_rreq_trace_copy, "COPY ") \
EM(netfs_rreq_trace_dirty, "DIRTY ") \
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_free, "FREE ") \
EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \
EM(netfs_rreq_trace_recollect, "RECLLCT") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
EM(netfs_rreq_trace_set_abandon, "S-ABNDN") \
@ -63,13 +65,15 @@
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \
EM(netfs_rreq_trace_unmark, "UNMARK ") \
EM(netfs_rreq_trace_unpause, "UNPAUSE") \
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
EM(netfs_rreq_trace_wait_queue, "WAIT-Q ") \
EM(netfs_rreq_trace_wait_pause, "--PAUSED--") \
EM(netfs_rreq_trace_wait_quiesce, "WAIT-QUIESCE") \
EM(netfs_rreq_trace_waited_ip, "DONE-IP") \
EM(netfs_rreq_trace_waited_pause, "--UNPAUSED--") \
EM(netfs_rreq_trace_waited_quiesce, "DONE-QUIESCE") \
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
EM(netfs_rreq_trace_wake_queue, "WAKE-Q ") \
EM(netfs_rreq_trace_woke_queue, "WOKE-Q ") \
EM(netfs_rreq_trace_unpause, "UNPAUSE") \
E_(netfs_rreq_trace_write_done, "WR-DONE")
#define netfs_sreq_sources \
@ -82,6 +86,7 @@
E_(NETFS_WRITE_TO_CACHE, "WRIT")
#define netfs_sreq_traces \
EM(netfs_sreq_trace_abandoned, "ABNDN") \
EM(netfs_sreq_trace_add_donations, "+DON ") \
EM(netfs_sreq_trace_added, "ADD ") \
EM(netfs_sreq_trace_cache_nowrite, "CA-NW") \
@ -89,6 +94,7 @@
EM(netfs_sreq_trace_cache_write, "CA-WR") \
EM(netfs_sreq_trace_cancel, "CANCL") \
EM(netfs_sreq_trace_clear, "CLEAR") \
EM(netfs_sreq_trace_consumed, "CONSM") \
EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_donate_to_prev, "DON-P") \
EM(netfs_sreq_trace_donate_to_next, "DON-N") \
@ -96,7 +102,12 @@
EM(netfs_sreq_trace_fail, "FAIL ") \
EM(netfs_sreq_trace_free, "FREE ") \
EM(netfs_sreq_trace_hit_eof, "EOF ") \
EM(netfs_sreq_trace_io_progress, "IO ") \
EM(netfs_sreq_trace_io_bad, "I-BAD") \
EM(netfs_sreq_trace_io_malformed, "I-MLF") \
EM(netfs_sreq_trace_io_unknown, "I-UNK") \
EM(netfs_sreq_trace_io_progress, "I-OK ") \
EM(netfs_sreq_trace_io_req_submitted, "I-RSB") \
EM(netfs_sreq_trace_io_retry_needed, "I-RTR") \
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
@ -142,8 +153,8 @@
#define netfs_sreq_ref_traces \
EM(netfs_sreq_trace_get_copy_to_cache, "GET COPY2C ") \
EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \
EM(netfs_sreq_trace_get_submit, "GET SUBMIT") \
EM(netfs_sreq_trace_get_resubmit, "GET RESUBMT") \
EM(netfs_sreq_trace_get_submit, "GET SUBMIT ") \
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
EM(netfs_sreq_trace_put_abandon, "PUT ABANDON") \
@ -366,7 +377,7 @@ TRACE_EVENT(netfs_sreq,
__entry->slot = sreq->io_iter.folioq_slot;
),
TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx s=%u e=%d",
TP_printk("R=%08x[%x] %s %s f=%03x s=%llx %zx/%zx s=%u e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->what, netfs_sreq_traces),

View File

@ -195,18 +195,11 @@ static struct file *secretmem_file_create(unsigned long flags)
struct file *file;
struct inode *inode;
const char *anon_name = "[secretmem]";
int err;
inode = alloc_anon_inode(secretmem_mnt->mnt_sb);
inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
if (IS_ERR(inode))
return ERR_CAST(inode);
err = security_inode_init_security_anon(inode, &QSTR(anon_name), NULL);
if (err) {
file = ERR_PTR(err);
goto err_free_inode;
}
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
O_RDWR, &secretmem_fops);
if (IS_ERR(file))

View File

@ -461,10 +461,15 @@ TEST_F(coredump, socket_detect_userspace_client)
_exit(EXIT_FAILURE);
}
ret = read(fd_coredump, &c, 1);
close(fd_coredump);
close(fd_server);
close(fd_peer_pidfd);
close(fd_core_file);
if (ret < 1)
_exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
}
self->pid_coredump_server = pid_coredump_server;