mirror of
				git://git.yoctoproject.org/linux-yocto.git
				synced 2025-10-23 07:23:12 +02:00 
			
		
		
		
	xfs: allow queued realtime intents to drain before scrubbing
When a writer thread executes a chain of log intent items for the realtime volume, the ILOCKs taken during each step are for each rt metadata file, not the entire rt volume itself. Although scrub takes all rt metadata ILOCKs, this isn't sufficient to guard against scrub checking the rt volume while that writer thread is in the middle of finishing a chain because there's no higher level locking primitive guarding the realtime volume. When there's a collision, cross-referencing between data structures (e.g. rtrmapbt and rtrefcountbt) yields false corruption events; if repair is running, this results in incorrect repairs, which is catastrophic. Fix this by adding to the mount structure the same drain that we use to protect scrub against concurrent AG updates, but this time for the realtime volume. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
		
							parent
							
								
									6d4933c221
								
							
						
					
					
						commit
						428e488465
					
				|  | @ -324,10 +324,15 @@ xchk_bmap_rt_iextent_xref( | |||
| 			irec->br_startoff, &error)) | ||||
| 		return; | ||||
| 
 | ||||
| 	xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL); | ||||
| 	error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL); | ||||
| 	if (!xchk_fblock_process_error(info->sc, info->whichfork, | ||||
| 			irec->br_startoff, &error)) | ||||
| 		goto out_free; | ||||
| 
 | ||||
| 	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, | ||||
| 			irec->br_blockcount); | ||||
| 
 | ||||
| out_free: | ||||
| 	xchk_rtgroup_free(info->sc, &info->sc->sr); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -719,13 +719,79 @@ xchk_rtgroup_init( | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| void | ||||
| /* Lock all the rt group metadata inode ILOCKs and wait for intents. */ | ||||
| int | ||||
| xchk_rtgroup_lock( | ||||
| 	struct xfs_scrub	*sc, | ||||
| 	struct xchk_rt		*sr, | ||||
| 	unsigned int		rtglock_flags) | ||||
| { | ||||
| 	int			error = 0; | ||||
| 
 | ||||
| 	ASSERT(sr->rtg != NULL); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If we're /only/ locking the rtbitmap in shared mode, then we're | ||||
| 	 * obviously not trying to compare records in two metadata inodes. | ||||
| 	 * There's no need to drain intents here because the caller (most | ||||
| 	 * likely the rgsuper scanner) doesn't need that level of consistency. | ||||
| 	 */ | ||||
| 	if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) { | ||||
| 		xfs_rtgroup_lock(sr->rtg, rtglock_flags); | ||||
| 		sr->rtlock_flags = rtglock_flags; | ||||
| 		return 0; | ||||
| 	} | ||||
| 
 | ||||
| 	do { | ||||
| 		if (xchk_should_terminate(sc, &error)) | ||||
| 			return error; | ||||
| 
 | ||||
| 		xfs_rtgroup_lock(sr->rtg, rtglock_flags); | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * If we've grabbed a non-metadata file for scrubbing, we | ||||
| 		 * assume that holding its ILOCK will suffice to coordinate | ||||
| 		 * with any rt intent chains involving this inode. | ||||
| 		 */ | ||||
| 		if (sc->ip && !xfs_is_internal_inode(sc->ip)) | ||||
| 			break; | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Decide if the rt group is quiet enough for all metadata to | ||||
| 		 * be consistent with each other.  Regular file IO doesn't get | ||||
| 		 * to lock all the rt inodes at the same time, which means that | ||||
| 		 * there could be other threads in the middle of processing a | ||||
| 		 * chain of deferred ops. | ||||
| 		 * | ||||
| 		 * We just locked all the metadata inodes for this rt group; | ||||
| 		 * now take a look to see if there are any intents in progress. | ||||
| 		 * If there are, drop the rt group inode locks and wait for the | ||||
| 		 * intents to drain.  Since we hold the rt group inode locks | ||||
| 		 * for the duration of the scrub, this is the only time we have | ||||
| 		 * to sample the intents counter; any threads increasing it | ||||
| 		 * after this point can't possibly be in the middle of a chain | ||||
| 		 * of rt metadata updates. | ||||
| 		 * | ||||
| 		 * Obviously, this should be slanted against scrub and in favor | ||||
| 		 * of runtime threads. | ||||
| 		 */ | ||||
| 		if (!xfs_group_intent_busy(rtg_group(sr->rtg))) | ||||
| 			break; | ||||
| 
 | ||||
| 		xfs_rtgroup_unlock(sr->rtg, rtglock_flags); | ||||
| 
 | ||||
| 		if (!(sc->flags & XCHK_FSGATES_DRAIN)) | ||||
| 			return -ECHRNG; | ||||
| 		error = xfs_group_intent_drain(rtg_group(sr->rtg)); | ||||
| 		if (error) { | ||||
| 			if (error == -ERESTARTSYS) | ||||
| 				error = -EINTR; | ||||
| 			return error; | ||||
| 		} | ||||
| 	} while (1); | ||||
| 
 | ||||
| 	sr->rtlock_flags = rtglock_flags; | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -1379,7 +1445,7 @@ xchk_fsgates_enable( | |||
| 	trace_xchk_fsgates_enable(sc, scrub_fsgates); | ||||
| 
 | ||||
| 	if (scrub_fsgates & XCHK_FSGATES_DRAIN) | ||||
| 		xfs_drain_wait_enable(); | ||||
| 		xfs_defer_drain_wait_enable(); | ||||
| 
 | ||||
| 	if (scrub_fsgates & XCHK_FSGATES_QUOTA) | ||||
| 		xfs_dqtrx_hook_enable(); | ||||
|  |  | |||
|  | @ -141,12 +141,13 @@ xchk_rtgroup_init_existing( | |||
| 	return error == -ENOENT ? -EFSCORRUPTED : error; | ||||
| } | ||||
| 
 | ||||
| void xchk_rtgroup_lock(struct xchk_rt *sr, unsigned int rtglock_flags); | ||||
| int xchk_rtgroup_lock(struct xfs_scrub *sc, struct xchk_rt *sr, | ||||
| 		unsigned int rtglock_flags); | ||||
| void xchk_rtgroup_free(struct xfs_scrub *sc, struct xchk_rt *sr); | ||||
| #else | ||||
| # define xchk_rtgroup_init(sc, rgno, sr)		(-EFSCORRUPTED) | ||||
| # define xchk_rtgroup_init_existing(sc, rgno, sr)	(-EFSCORRUPTED) | ||||
| # define xchk_rtgroup_lock(sc, lockflags)		do { } while (0) | ||||
| # define xchk_rtgroup_lock(sc, sr, lockflags)		(-EFSCORRUPTED) | ||||
| # define xchk_rtgroup_free(sc, sr)			do { } while (0) | ||||
| #endif /* CONFIG_XFS_RT */ | ||||
| 
 | ||||
|  |  | |||
|  | @ -61,7 +61,9 @@ xchk_rgsuperblock( | |||
| 	if (!xchk_xref_process_error(sc, 0, 0, &error)) | ||||
| 		return error; | ||||
| 
 | ||||
| 	xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED); | ||||
| 	error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP_SHARED); | ||||
| 	if (error) | ||||
| 		return error; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Since we already validated the rt superblock at mount time, we don't | ||||
|  |  | |||
|  | @ -30,6 +30,9 @@ xchk_setup_rtbitmap( | |||
| 	struct xchk_rtbitmap	*rtb; | ||||
| 	int			error; | ||||
| 
 | ||||
| 	if (xchk_need_intent_drain(sc)) | ||||
| 		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); | ||||
| 
 | ||||
| 	rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS); | ||||
| 	if (!rtb) | ||||
| 		return -ENOMEM; | ||||
|  | @ -57,12 +60,15 @@ xchk_setup_rtbitmap( | |||
| 	if (error) | ||||
| 		return error; | ||||
| 
 | ||||
| 	error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL); | ||||
| 	if (error) | ||||
| 		return error; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now that we've locked the rtbitmap, we can't race with growfsrt | ||||
| 	 * trying to expand the bitmap or change the size of the rt volume. | ||||
| 	 * Hence it is safe to compute and check the geometry values. | ||||
| 	 */ | ||||
| 	xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP); | ||||
| 	if (mp->m_sb.sb_rblocks) { | ||||
| 		rtb->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks); | ||||
| 		rtb->rextslog = xfs_compute_rextslog(rtb->rextents); | ||||
|  |  | |||
|  | @ -89,6 +89,10 @@ xchk_setup_rtsummary( | |||
| 	if (error) | ||||
| 		return error; | ||||
| 
 | ||||
| 	error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP); | ||||
| 	if (error) | ||||
| 		return error; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Now that we've locked the rtbitmap and rtsummary, we can't race with | ||||
| 	 * growfsrt trying to expand the summary or change the size of the rt | ||||
|  | @ -99,7 +103,6 @@ xchk_setup_rtsummary( | |||
| 	 * exclusively here.  If we ever start caring about running concurrent | ||||
| 	 * fsmap with scrub this could be changed. | ||||
| 	 */ | ||||
| 	xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP); | ||||
| 	if (mp->m_sb.sb_rblocks) { | ||||
| 		rts->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks); | ||||
| 		rts->rbmblocks = xfs_rtbitmap_blockcount(mp); | ||||
|  |  | |||
|  | @ -164,7 +164,7 @@ xchk_fsgates_disable( | |||
| 	trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL); | ||||
| 
 | ||||
| 	if (sc->flags & XCHK_FSGATES_DRAIN) | ||||
| 		xfs_drain_wait_disable(); | ||||
| 		xfs_defer_drain_wait_disable(); | ||||
| 
 | ||||
| 	if (sc->flags & XCHK_FSGATES_QUOTA) | ||||
| 		xfs_dqtrx_hook_disable(); | ||||
|  |  | |||
|  | @ -13,28 +13,28 @@ | |||
| #include "xfs_trace.h" | ||||
| 
 | ||||
| /*
 | ||||
|  * Use a static key here to reduce the overhead of xfs_drain_rele.  If the | ||||
|  * compiler supports jump labels, the static branch will be replaced by a nop | ||||
|  * sled when there are no xfs_drain_wait callers.  Online fsck is currently | ||||
|  * the only caller, so this is a reasonable tradeoff. | ||||
|  * Use a static key here to reduce the overhead of xfs_defer_drain_rele.  If | ||||
|  * the compiler supports jump labels, the static branch will be replaced by a | ||||
|  * nop sled when there are no xfs_defer_drain_wait callers.  Online fsck is | ||||
|  * currently the only caller, so this is a reasonable tradeoff. | ||||
|  * | ||||
|  * Note: Patching the kernel code requires taking the cpu hotplug lock.  Other | ||||
|  * parts of the kernel allocate memory with that lock held, which means that | ||||
|  * XFS callers cannot hold any locks that might be used by memory reclaim or | ||||
|  * writeback when calling the static_branch_{inc,dec} functions. | ||||
|  */ | ||||
| static DEFINE_STATIC_KEY_FALSE(xfs_drain_waiter_gate); | ||||
| static DEFINE_STATIC_KEY_FALSE(xfs_defer_drain_waiter_gate); | ||||
| 
 | ||||
| void | ||||
| xfs_drain_wait_disable(void) | ||||
| xfs_defer_drain_wait_disable(void) | ||||
| { | ||||
| 	static_branch_dec(&xfs_drain_waiter_gate); | ||||
| 	static_branch_dec(&xfs_defer_drain_waiter_gate); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| xfs_drain_wait_enable(void) | ||||
| xfs_defer_drain_wait_enable(void) | ||||
| { | ||||
| 	static_branch_inc(&xfs_drain_waiter_gate); | ||||
| 	static_branch_inc(&xfs_defer_drain_waiter_gate); | ||||
| } | ||||
| 
 | ||||
| void | ||||
|  | @ -71,7 +71,7 @@ static inline bool has_waiters(struct wait_queue_head *wq_head) | |||
| static inline void xfs_defer_drain_rele(struct xfs_defer_drain *dr) | ||||
| { | ||||
| 	if (atomic_dec_and_test(&dr->dr_count) && | ||||
| 	    static_branch_unlikely(&xfs_drain_waiter_gate) && | ||||
| 	    static_branch_unlikely(&xfs_defer_drain_waiter_gate) && | ||||
| 	    has_waiters(&dr->dr_waiters)) | ||||
| 		wake_up(&dr->dr_waiters); | ||||
| } | ||||
|  |  | |||
|  | @ -26,8 +26,8 @@ struct xfs_defer_drain { | |||
| void xfs_defer_drain_init(struct xfs_defer_drain *dr); | ||||
| void xfs_defer_drain_free(struct xfs_defer_drain *dr); | ||||
| 
 | ||||
| void xfs_drain_wait_disable(void); | ||||
| void xfs_drain_wait_enable(void); | ||||
| void xfs_defer_drain_wait_disable(void); | ||||
| void xfs_defer_drain_wait_enable(void); | ||||
| 
 | ||||
| /*
 | ||||
|  * Deferred Work Intent Drains | ||||
|  | @ -61,6 +61,9 @@ void xfs_drain_wait_enable(void); | |||
|  * All functions that create work items must increment the intent counter as | ||||
|  * soon as the item is added to the transaction and cannot drop the counter | ||||
|  * until the item is finished or cancelled. | ||||
|  * | ||||
|  * The same principles apply to realtime groups because the rt metadata inode | ||||
|  * ILOCKs are not held across transaction rolls. | ||||
|  */ | ||||
| struct xfs_group *xfs_group_intent_get(struct xfs_mount *mp, | ||||
| 		xfs_fsblock_t fsbno, enum xfs_group_type type); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Darrick J. Wong
						Darrick J. Wong