xfs: Fixes for 6.16-rc5

Signed-off-by: Carlos Maiolino <cem@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iJUEABMJAB0WIQSmtYVZ/MfVMGUq1GNcsMJ8RxYuYwUCaGKExAAKCRBcsMJ8RxYu
 Y4fTAXoCMZGmJKwTbcBk/9u2nD1ehULBDBQB+jDEjxQUile2fMvSMndxqAw0Dgt5
 RAg055kBfiwXnK92j2dgayVabNDY3HAxcmGe4B3OBC58/7rNINtgdujfj/gtHZLG
 M0Cko5OICA==
 =QrM0
 -----END PGP SIGNATURE-----

Merge tag 'xfs-fixes-6.16-rc5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Carlos Maiolino:

 - Fix umount hang with unflushable inodes (and add new tracepoint used
   for debugging this)

 - Fix ABBA deadlock in xfs_reclaim_inode() vs xfs_ifree_cluster()

 - Fix dquot buffer pin deadlock

* tag 'xfs-fixes-6.16-rc5' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: add FALLOC_FL_ALLOCATE_RANGE to supported flags mask
  xfs: fix unmount hang with unflushable inodes stuck in the AIL
  xfs: factor out stale buffer item completion
  xfs: rearrange code in xfs_buf_item.c
  xfs: add tracepoints for stale pinned inode state debug
  xfs: avoid dquot buffer pin deadlock
  xfs: catch stale AGF/AGF metadata
  xfs: xfs_ifree_cluster vs xfs_iflush_shutdown_abort deadlock
  xfs: actually use the xfs_growfs_check_rtgeom tracepoint
  xfs: Improve error handling in xfs_mru_cache_create()
  xfs: move xfs_submit_zoned_bio a bit
  xfs: use xfs_readonly_buftarg in xfs_remount_rw
  xfs: remove NULL pointer checks in xfs_mru_cache_insert
  xfs: check for shutdown before going to sleep in xfs_select_zone
This commit is contained in:
Linus Torvalds 2025-07-03 09:00:04 -07:00
commit d32e907d15
19 changed files with 321 additions and 288 deletions

View File

@ -3444,16 +3444,41 @@ xfs_alloc_read_agf(
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
#ifdef DEBUG
else if (!xfs_is_shutdown(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level));
ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level));
/*
* It's possible for the AGF to be out of sync if the block device is
* silently dropping writes. This can happen in fstests with dmflakey
* enabled, which allows the buffer to be cleaned and reclaimed by
* memory pressure and then re-read from disk here. We will get a
* stale version of the AGF from disk, and nothing good can happen from
* here. Hence if we detect this situation, immediately shut down the
* filesystem.
*
* This can also happen if we are already in the middle of a forced
* shutdown, so don't bother checking if we are already shut down.
*/
if (!xfs_is_shutdown(pag_mount(pag))) {
bool ok = true;
ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
ok &= pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks);
ok &= pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks);
ok &= pag->pagf_flcount == be32_to_cpu(agf->agf_flcount);
ok &= pag->pagf_longest == be32_to_cpu(agf->agf_longest);
ok &= pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level);
ok &= pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level);
if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
xfs_trans_brelse(tp, agfbp);
xfs_force_shutdown(pag_mount(pag),
SHUTDOWN_CORRUPT_ONDISK);
return -EFSCORRUPTED;
}
}
#endif
#endif /* DEBUG */
if (agfbpp)
*agfbpp = agfbp;
else

View File

@ -2801,12 +2801,35 @@ xfs_ialloc_read_agi(
set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
}
#ifdef DEBUG
/*
* It's possible for these to be out of sync if
* we are in the middle of a forced shutdown.
* It's possible for the AGF to be out of sync if the block device is
* silently dropping writes. This can happen in fstests with dmflakey
* enabled, which allows the buffer to be cleaned and reclaimed by
* memory pressure and then re-read from disk here. We will get a
* stale version of the AGF from disk, and nothing good can happen from
* here. Hence if we detect this situation, immediately shut down the
* filesystem.
*
* This can also happen if we are already in the middle of a forced
* shutdown, so don't bother checking if we are already shut down.
*/
ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
xfs_is_shutdown(pag_mount(pag)));
if (!xfs_is_shutdown(pag_mount(pag))) {
bool ok = true;
ok &= pag->pagi_freecount == be32_to_cpu(agi->agi_freecount);
ok &= pag->pagi_count == be32_to_cpu(agi->agi_count);
if (XFS_IS_CORRUPT(pag_mount(pag), !ok)) {
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
xfs_trans_brelse(tp, agibp);
xfs_force_shutdown(pag_mount(pag),
SHUTDOWN_CORRUPT_ONDISK);
return -EFSCORRUPTED;
}
}
#endif /* DEBUG */
if (agibpp)
*agibpp = agibp;
else

View File

@ -2082,44 +2082,6 @@ xfs_buf_delwri_submit(
return error;
}
/*
* Push a single buffer on a delwri queue.
*
* The purpose of this function is to submit a single buffer of a delwri queue
* and return with the buffer still on the original queue.
*
* The buffer locking and queue management logic between _delwri_pushbuf() and
* _delwri_queue() guarantee that the buffer cannot be queued to another list
* before returning.
*/
int
xfs_buf_delwri_pushbuf(
struct xfs_buf *bp,
struct list_head *buffer_list)
{
int error;
ASSERT(bp->b_flags & _XBF_DELWRI_Q);
trace_xfs_buf_delwri_pushbuf(bp, _RET_IP_);
xfs_buf_lock(bp);
bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC);
bp->b_flags |= XBF_WRITE;
xfs_buf_submit(bp);
/*
* The buffer is now locked, under I/O but still on the original delwri
* queue. Wait for I/O completion, restore the DELWRI_Q flag and
* return with the buffer unlocked and still on the original queue.
*/
error = xfs_buf_iowait(bp);
bp->b_flags |= _XBF_DELWRI_Q;
xfs_buf_unlock(bp);
return error;
}
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
/*

View File

@ -326,7 +326,6 @@ extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
void xfs_buf_delwri_queue_here(struct xfs_buf *bp, struct list_head *bl);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
extern int xfs_buf_delwri_pushbuf(struct xfs_buf *, struct list_head *);
static inline xfs_daddr_t xfs_buf_daddr(struct xfs_buf *bp)
{

View File

@ -32,6 +32,61 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
return container_of(lip, struct xfs_buf_log_item, bli_item);
}
static void
xfs_buf_item_get_format(
struct xfs_buf_log_item *bip,
int count)
{
ASSERT(bip->bli_formats == NULL);
bip->bli_format_count = count;
if (count == 1) {
bip->bli_formats = &bip->__bli_format;
return;
}
bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
GFP_KERNEL | __GFP_NOFAIL);
}
static void
xfs_buf_item_free_format(
struct xfs_buf_log_item *bip)
{
if (bip->bli_formats != &bip->__bli_format) {
kfree(bip->bli_formats);
bip->bli_formats = NULL;
}
}
static void
xfs_buf_item_free(
struct xfs_buf_log_item *bip)
{
xfs_buf_item_free_format(bip);
kvfree(bip->bli_item.li_lv_shadow);
kmem_cache_free(xfs_buf_item_cache, bip);
}
/*
* xfs_buf_item_relse() is called when the buf log item is no longer needed.
*/
static void
xfs_buf_item_relse(
struct xfs_buf_log_item *bip)
{
struct xfs_buf *bp = bip->bli_buf;
trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
ASSERT(atomic_read(&bip->bli_refcount) == 0);
bp->b_log_item = NULL;
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
/* Is this log iovec plausibly large enough to contain the buffer log format? */
bool
xfs_buf_log_check_iovec(
@ -389,6 +444,42 @@ xfs_buf_item_pin(
atomic_inc(&bip->bli_buf->b_pin_count);
}
/*
* For a stale BLI, process all the necessary completions that must be
* performed when the final BLI reference goes away. The buffer will be
* referenced and locked here - we return to the caller with the buffer still
* referenced and locked for them to finalise processing of the buffer.
*/
static void
xfs_buf_item_finish_stale(
struct xfs_buf_log_item *bip)
{
struct xfs_buf *bp = bip->bli_buf;
struct xfs_log_item *lip = &bip->bli_item;
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
ASSERT(bp->b_flags & XBF_STALE);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(list_empty(&lip->li_trans));
ASSERT(!bp->b_transp);
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_item_done(bp);
xfs_buf_inode_iodone(bp);
ASSERT(list_empty(&bp->b_li_list));
return;
}
/*
* We may or may not be on the AIL here, xfs_trans_ail_delete() will do
* the right thing regardless of the situation in which we are called.
*/
xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bip);
ASSERT(bp->b_log_item == NULL);
}
/*
* This is called to unpin the buffer associated with the buf log item which was
* previously pinned with a call to xfs_buf_item_pin(). We enter this function
@ -438,13 +529,6 @@ xfs_buf_item_unpin(
}
if (stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
ASSERT(bp->b_flags & XBF_STALE);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(list_empty(&lip->li_trans));
ASSERT(!bp->b_transp);
trace_xfs_buf_item_unpin_stale(bip);
/*
@ -455,22 +539,7 @@ xfs_buf_item_unpin(
* processing is complete.
*/
xfs_buf_rele(bp);
/*
* If we get called here because of an IO error, we may or may
* not have the item on the AIL. xfs_trans_ail_delete() will
* take care of that situation. xfs_trans_ail_delete() drops
* the AIL lock.
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_item_done(bp);
xfs_buf_inode_iodone(bp);
ASSERT(list_empty(&bp->b_li_list));
} else {
xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
ASSERT(bp->b_log_item == NULL);
}
xfs_buf_item_finish_stale(bip);
xfs_buf_relse(bp);
return;
}
@ -543,43 +612,42 @@ xfs_buf_item_push(
* Drop the buffer log item refcount and take appropriate action. This helper
* determines whether the bli must be freed or not, since a decrement to zero
* does not necessarily mean the bli is unused.
*
* Return true if the bli is freed, false otherwise.
*/
bool
void
xfs_buf_item_put(
struct xfs_buf_log_item *bip)
{
struct xfs_log_item *lip = &bip->bli_item;
bool aborted;
bool dirty;
ASSERT(xfs_buf_islocked(bip->bli_buf));
/* drop the bli ref and return if it wasn't the last one */
if (!atomic_dec_and_test(&bip->bli_refcount))
return false;
return;
/* If the BLI is in the AIL, then it is still dirty and in use */
if (test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)) {
ASSERT(bip->bli_flags & XFS_BLI_DIRTY);
return;
}
/*
* We dropped the last ref and must free the item if clean or aborted.
* If the bli is dirty and non-aborted, the buffer was clean in the
* transaction but still awaiting writeback from previous changes. In
* that case, the bli is freed on buffer writeback completion.
* In shutdown conditions, we can be asked to free a dirty BLI that
* isn't in the AIL. This can occur due to a checkpoint aborting a BLI
* instead of inserting it into the AIL at checkpoint IO completion. If
* there's another bli reference (e.g. a btree cursor holds a clean
* reference) and it is released via xfs_trans_brelse(), we can get here
* with that aborted, dirty BLI. In this case, it is safe to free the
* dirty BLI immediately, as it is not in the AIL and there are no
* other references to it.
*
* We should never get here with a stale BLI via that path as
* xfs_trans_brelse() specifically holds onto stale buffers rather than
* releasing them.
*/
aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||
xlog_is_shutdown(lip->li_log);
dirty = bip->bli_flags & XFS_BLI_DIRTY;
if (dirty && !aborted)
return false;
/*
* The bli is aborted or clean. An aborted item may be in the AIL
* regardless of dirty state. For example, consider an aborted
* transaction that invalidated a dirty bli and cleared the dirty
* state.
*/
if (aborted)
xfs_trans_ail_delete(lip, 0);
xfs_buf_item_relse(bip->bli_buf);
return true;
ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY) ||
test_bit(XFS_LI_ABORTED, &bip->bli_item.li_flags));
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
xfs_buf_item_relse(bip);
}
/*
@ -600,6 +668,15 @@ xfs_buf_item_put(
* if necessary but do not unlock the buffer. This is for support of
* xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't
* free the item.
*
* If the XFS_BLI_STALE flag is set, the last reference to the BLI *must*
* perform a completion abort of any objects attached to the buffer for IO
* tracking purposes. This generally only happens in shutdown situations,
* normally xfs_buf_item_unpin() will drop the last BLI reference and perform
* completion processing. However, because transaction completion can race with
* checkpoint completion during a shutdown, this release context may end up
* being the last active reference to the BLI and so needs to perform this
* cleanup.
*/
STATIC void
xfs_buf_item_release(
@ -607,18 +684,19 @@ xfs_buf_item_release(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
bool released;
bool hold = bip->bli_flags & XFS_BLI_HOLD;
bool stale = bip->bli_flags & XFS_BLI_STALE;
#if defined(DEBUG) || defined(XFS_WARN)
bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
bool aborted = test_bit(XFS_LI_ABORTED,
&lip->li_flags);
bool dirty = bip->bli_flags & XFS_BLI_DIRTY;
#if defined(DEBUG) || defined(XFS_WARN)
bool ordered = bip->bli_flags & XFS_BLI_ORDERED;
#endif
trace_xfs_buf_item_release(bip);
ASSERT(xfs_buf_islocked(bp));
/*
* The bli dirty state should match whether the blf has logged segments
* except for ordered buffers, where only the bli should be dirty.
@ -634,16 +712,56 @@ xfs_buf_item_release(
bp->b_transp = NULL;
bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
/* If there are other references, then we have nothing to do. */
if (!atomic_dec_and_test(&bip->bli_refcount))
goto out_release;
/*
* Unref the item and unlock the buffer unless held or stale. Stale
* buffers remain locked until final unpin unless the bli is freed by
* the unref call. The latter implies shutdown because buffer
* invalidation dirties the bli and transaction.
* Stale buffer completion frees the BLI, unlocks and releases the
* buffer. Neither the BLI or buffer are safe to reference after this
* call, so there's nothing more we need to do here.
*
* If we get here with a stale buffer and references to the BLI remain,
* we must not unlock the buffer as the last BLI reference owns lock
* context, not us.
*/
released = xfs_buf_item_put(bip);
if (hold || (stale && !released))
if (stale) {
xfs_buf_item_finish_stale(bip);
xfs_buf_relse(bp);
ASSERT(!hold);
return;
}
/*
* Dirty or clean, aborted items are done and need to be removed from
* the AIL and released. This frees the BLI, but leaves the buffer
* locked and referenced.
*/
if (aborted || xlog_is_shutdown(lip->li_log)) {
ASSERT(list_empty(&bip->bli_buf->b_li_list));
xfs_buf_item_done(bp);
goto out_release;
}
/*
* Clean, unreferenced BLIs can be immediately freed, leaving the buffer
* locked and referenced.
*
* Dirty, unreferenced BLIs *must* be in the AIL awaiting writeback.
*/
if (!dirty)
xfs_buf_item_relse(bip);
else
ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
/* Not safe to reference the BLI from here */
out_release:
/*
* If we get here with a stale buffer, we must not unlock the
* buffer as the last BLI reference owns lock context, not us.
*/
if (stale || hold)
return;
ASSERT(!stale || aborted);
xfs_buf_relse(bp);
}
@ -729,33 +847,6 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_push = xfs_buf_item_push,
};
STATIC void
xfs_buf_item_get_format(
struct xfs_buf_log_item *bip,
int count)
{
ASSERT(bip->bli_formats == NULL);
bip->bli_format_count = count;
if (count == 1) {
bip->bli_formats = &bip->__bli_format;
return;
}
bip->bli_formats = kzalloc(count * sizeof(struct xfs_buf_log_format),
GFP_KERNEL | __GFP_NOFAIL);
}
STATIC void
xfs_buf_item_free_format(
struct xfs_buf_log_item *bip)
{
if (bip->bli_formats != &bip->__bli_format) {
kfree(bip->bli_formats);
bip->bli_formats = NULL;
}
}
/*
* Allocate a new buf log item to go with the given buffer.
* Set the buffer's b_log_item field to point to the new
@ -976,34 +1067,6 @@ xfs_buf_item_dirty_format(
return false;
}
STATIC void
xfs_buf_item_free(
struct xfs_buf_log_item *bip)
{
xfs_buf_item_free_format(bip);
kvfree(bip->bli_item.li_lv_shadow);
kmem_cache_free(xfs_buf_item_cache, bip);
}
/*
* xfs_buf_item_relse() is called when the buf log item is no longer needed.
*/
void
xfs_buf_item_relse(
struct xfs_buf *bp)
{
struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
if (atomic_read(&bip->bli_refcount))
return;
bp->b_log_item = NULL;
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
void
xfs_buf_item_done(
struct xfs_buf *bp)
@ -1023,5 +1086,5 @@ xfs_buf_item_done(
xfs_trans_ail_delete(&bp->b_log_item->bli_item,
(bp->b_flags & _XBF_LOGRECOVERY) ? 0 :
SHUTDOWN_CORRUPT_INCORE);
xfs_buf_item_relse(bp);
xfs_buf_item_relse(bp->b_log_item);
}

View File

@ -49,8 +49,7 @@ struct xfs_buf_log_item {
int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
void xfs_buf_item_done(struct xfs_buf *bp);
void xfs_buf_item_relse(struct xfs_buf *);
bool xfs_buf_item_put(struct xfs_buf_log_item *);
void xfs_buf_item_put(struct xfs_buf_log_item *bip);
void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
void xfs_buf_inode_iodone(struct xfs_buf *);

View File

@ -1398,11 +1398,9 @@ xfs_qm_dqflush(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(!completion_done(&dqp->q_flush));
ASSERT(atomic_read(&dqp->q_pincount) == 0);
trace_xfs_dqflush(dqp);
xfs_qm_dqunpin_wait(dqp);
fa = xfs_qm_dqflush_check(dqp);
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS",

View File

@ -1335,9 +1335,10 @@ xfs_falloc_allocate_range(
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE)
(FALLOC_FL_ALLOCATE_RANGE | FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
STATIC long
__xfs_file_fallocate(

View File

@ -979,7 +979,15 @@ xfs_reclaim_inode(
*/
if (xlog_is_shutdown(ip->i_mount->m_log)) {
xfs_iunpin_wait(ip);
/*
* Avoid a ABBA deadlock on the inode cluster buffer vs
* concurrent xfs_ifree_cluster() trying to mark the inode
* stale. We don't need the inode locked to run the flush abort
* code, but the flush abort needs to lock the cluster buffer.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iflush_shutdown_abort(ip);
xfs_ilock(ip, XFS_ILOCK_EXCL);
goto reclaim;
}
if (xfs_ipincount(ip))

View File

@ -1635,7 +1635,7 @@ retry:
iip = ip->i_itemp;
if (__xfs_iflags_test(ip, XFS_IFLUSHING)) {
ASSERT(!list_empty(&iip->ili_item.li_bio_list));
ASSERT(iip->ili_last_fields);
ASSERT(iip->ili_last_fields || xlog_is_shutdown(mp->m_log));
goto out_iunlock;
}

View File

@ -758,11 +758,14 @@ xfs_inode_item_push(
* completed and items removed from the AIL before the next push
* attempt.
*/
trace_xfs_inode_push_stale(ip, _RET_IP_);
return XFS_ITEM_PINNED;
}
if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp))
if (xfs_ipincount(ip) > 0 || xfs_buf_ispinned(bp)) {
trace_xfs_inode_push_pinned(ip, _RET_IP_);
return XFS_ITEM_PINNED;
}
if (xfs_iflags_test(ip, XFS_IFLUSHING))
return XFS_ITEM_FLUSHING;

View File

@ -793,8 +793,10 @@ xlog_cil_ail_insert(
struct xfs_log_item *lip = lv->lv_item;
xfs_lsn_t item_lsn;
if (aborted)
if (aborted) {
trace_xlog_ail_insert_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
}
if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
lip->li_ops->iop_release(lip);

View File

@ -320,7 +320,7 @@ xfs_mru_cache_create(
xfs_mru_cache_free_func_t free_func)
{
struct xfs_mru_cache *mru = NULL;
int err = 0, grp;
int grp;
unsigned int grp_time;
if (mrup)
@ -341,8 +341,8 @@ xfs_mru_cache_create(
mru->lists = kzalloc(mru->grp_count * sizeof(*mru->lists),
GFP_KERNEL | __GFP_NOFAIL);
if (!mru->lists) {
err = -ENOMEM;
goto exit;
kfree(mru);
return -ENOMEM;
}
for (grp = 0; grp < mru->grp_count; grp++)
@ -361,14 +361,7 @@ xfs_mru_cache_create(
mru->free_func = free_func;
mru->data = data;
*mrup = mru;
exit:
if (err && mru && mru->lists)
kfree(mru->lists);
if (err && mru)
kfree(mru);
return err;
return 0;
}
/*
@ -425,10 +418,6 @@ xfs_mru_cache_insert(
{
int error = -EINVAL;
ASSERT(mru && mru->lists);
if (!mru || !mru->lists)
goto out_free;
error = -ENOMEM;
if (radix_tree_preload(GFP_KERNEL))
goto out_free;

View File

@ -134,6 +134,7 @@ xfs_qm_dqpurge(
dqp->q_flags |= XFS_DQFLAG_FREEING;
xfs_qm_dqunpin_wait(dqp);
xfs_dqflock(dqp);
/*
@ -465,6 +466,7 @@ xfs_qm_dquot_isolate(
struct xfs_dquot *dqp = container_of(item,
struct xfs_dquot, q_lru);
struct xfs_qm_isolate *isol = arg;
enum lru_status ret = LRU_SKIP;
if (!xfs_dqlock_nowait(dqp))
goto out_miss_busy;
@ -477,6 +479,16 @@ xfs_qm_dquot_isolate(
if (dqp->q_flags & XFS_DQFLAG_FREEING)
goto out_miss_unlock;
/*
* If the dquot is pinned or dirty, rotate it to the end of the LRU to
* give some time for it to be cleaned before we try to isolate it
* again.
*/
ret = LRU_ROTATE;
if (XFS_DQ_IS_DIRTY(dqp) || atomic_read(&dqp->q_pincount) > 0) {
goto out_miss_unlock;
}
/*
* This dquot has acquired a reference in the meantime remove it from
* the freelist and try again.
@ -492,41 +504,14 @@ xfs_qm_dquot_isolate(
}
/*
* If the dquot is dirty, flush it. If it's already being flushed, just
* skip it so there is time for the IO to complete before we try to
* reclaim it again on the next LRU pass.
* The dquot may still be under IO, in which case the flush lock will be
* held. If we can't get the flush lock now, just skip over the dquot as
* if it was dirty.
*/
if (!xfs_dqflock_nowait(dqp))
goto out_miss_unlock;
if (XFS_DQ_IS_DIRTY(dqp)) {
struct xfs_buf *bp = NULL;
int error;
trace_xfs_dqreclaim_dirty(dqp);
/* we have to drop the LRU lock to flush the dquot */
spin_unlock(&lru->lock);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (!bp || error == -EAGAIN) {
xfs_dqfunlock(dqp);
goto out_unlock_dirty;
}
/*
* dqflush completes dqflock on error, and the delwri ioend
* does it on success.
*/
error = xfs_qm_dqflush(dqp, bp);
if (error)
goto out_unlock_dirty;
xfs_buf_delwri_queue(bp, &isol->buffers);
xfs_buf_relse(bp);
goto out_unlock_dirty;
}
ASSERT(!XFS_DQ_IS_DIRTY(dqp));
xfs_dquot_detach_buf(dqp);
xfs_dqfunlock(dqp);
@ -548,13 +533,7 @@ out_miss_unlock:
out_miss_busy:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
return LRU_SKIP;
out_unlock_dirty:
trace_xfs_dqreclaim_busy(dqp);
XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
xfs_dqunlock(dqp);
return LRU_RETRY;
return ret;
}
static unsigned long
@ -1486,7 +1465,6 @@ xfs_qm_flush_one(
struct xfs_dquot *dqp,
void *data)
{
struct xfs_mount *mp = dqp->q_mount;
struct list_head *buffer_list = data;
struct xfs_buf *bp = NULL;
int error = 0;
@ -1497,34 +1475,8 @@ xfs_qm_flush_one(
if (!XFS_DQ_IS_DIRTY(dqp))
goto out_unlock;
/*
* The only way the dquot is already flush locked by the time quotacheck
* gets here is if reclaim flushed it before the dqadjust walk dirtied
* it for the final time. Quotacheck collects all dquot bufs in the
* local delwri queue before dquots are dirtied, so reclaim can't have
* possibly queued it for I/O. The only way out is to push the buffer to
* cycle the flush lock.
*/
if (!xfs_dqflock_nowait(dqp)) {
/* buf is pinned in-core by delwri list */
error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp);
if (error)
goto out_unlock;
if (!(bp->b_flags & _XBF_DELWRI_Q)) {
error = -EAGAIN;
xfs_buf_relse(bp);
goto out_unlock;
}
xfs_buf_unlock(bp);
xfs_buf_delwri_pushbuf(bp, buffer_list);
xfs_buf_rele(bp);
error = -EAGAIN;
goto out_unlock;
}
xfs_qm_dqunpin_wait(dqp);
xfs_dqflock(dqp);
error = xfs_dquot_use_attached_buf(dqp, &bp);
if (error)

View File

@ -1259,6 +1259,8 @@ xfs_growfs_check_rtgeom(
kfree(nmp);
trace_xfs_growfs_check_rtgeom(mp, min_logfsbs);
if (min_logfsbs > mp->m_sb.sb_logblocks)
return -EINVAL;

View File

@ -2020,14 +2020,13 @@ xfs_remount_rw(
int error;
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp &&
bdev_read_only(mp->m_logdev_targp->bt_bdev)) {
xfs_readonly_buftarg(mp->m_logdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only logdev");
return -EACCES;
}
if (mp->m_rtdev_targp &&
bdev_read_only(mp->m_rtdev_targp->bt_bdev)) {
if (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp)) {
xfs_warn(mp,
"ro->rw transition prohibited by read-only rtdev");
return -EACCES;

View File

@ -778,7 +778,6 @@ DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_queued);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
DEFINE_BUF_EVENT(xfs_buf_delwri_pushbuf);
DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_iodone_async);
@ -1147,6 +1146,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__field(xfs_ino_t, ino)
__field(int, count)
__field(int, pincount)
__field(unsigned long, iflags)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
@ -1154,13 +1154,15 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
__entry->ino = ip->i_ino;
__entry->count = atomic_read(&VFS_I(ip)->i_count);
__entry->pincount = atomic_read(&ip->i_pincount);
__entry->iflags = ip->i_flags;
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pS",
TP_printk("dev %d:%d ino 0x%llx count %d pincount %d iflags 0x%lx caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
__entry->pincount,
__entry->iflags,
(char *)__entry->caller_ip)
)
@ -1250,6 +1252,8 @@ DEFINE_IREF_EVENT(xfs_irele);
DEFINE_IREF_EVENT(xfs_inode_pin);
DEFINE_IREF_EVENT(xfs_inode_unpin);
DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
DEFINE_IREF_EVENT(xfs_inode_push_pinned);
DEFINE_IREF_EVENT(xfs_inode_push_stale);
DECLARE_EVENT_CLASS(xfs_namespace_class,
TP_PROTO(struct xfs_inode *dp, const struct xfs_name *name),
@ -1654,6 +1658,8 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
DEFINE_LOG_ITEM_EVENT(xlog_ail_insert_abort);
DEFINE_LOG_ITEM_EVENT(xfs_trans_free_abort);
DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),

View File

@ -742,8 +742,10 @@ xfs_trans_free_items(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
if (abort)
if (abort) {
trace_xfs_trans_free_abort(lip);
set_bit(XFS_LI_ABORTED, &lip->li_flags);
}
if (lip->li_ops->iop_release)
lip->li_ops->iop_release(lip);
}

View File

@ -727,7 +727,7 @@ xfs_select_zone(
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
if (oz)
if (oz || xfs_is_shutdown(mp))
break;
schedule();
}
@ -777,26 +777,6 @@ xfs_mark_rtg_boundary(
ioend->io_flags |= IOMAP_IOEND_BOUNDARY;
}
static void
xfs_submit_zoned_bio(
struct iomap_ioend *ioend,
struct xfs_open_zone *oz,
bool is_seq)
{
ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
ioend->io_private = oz;
atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
if (is_seq) {
ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
} else {
xfs_mark_rtg_boundary(ioend);
}
submit_bio(&ioend->io_bio);
}
/*
* Cache the last zone written to for an inode so that it is considered first
* for subsequent writes.
@ -891,6 +871,26 @@ xfs_zone_cache_create_association(
xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
}
static void
xfs_submit_zoned_bio(
struct iomap_ioend *ioend,
struct xfs_open_zone *oz,
bool is_seq)
{
ioend->io_bio.bi_iter.bi_sector = ioend->io_sector;
ioend->io_private = oz;
atomic_inc(&oz->oz_ref); /* for xfs_zoned_end_io */
if (is_seq) {
ioend->io_bio.bi_opf &= ~REQ_OP_WRITE;
ioend->io_bio.bi_opf |= REQ_OP_ZONE_APPEND;
} else {
xfs_mark_rtg_boundary(ioend);
}
submit_bio(&ioend->io_bio);
}
void
xfs_zone_alloc_and_submit(
struct iomap_ioend *ioend,