Merge 4c5b123ab2 ("blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race") into android15-6.6-lts

Steps on the way to 6.6.57 Change-Id: I561755de546d2b23668440400c65d012c2700435 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2025-07-19 07:39:54 +02:00 · 2024-11-21 22:09:25 +00:00 · 2024-11-21 22:09:25 +00:00 · db82f3c8d1
commit db82f3c8d1
parent c47a7d22d2 4c5b123ab2
41 changed files with 547 additions and 287 deletions
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 	vcpu->stat.instruction_diagnose_258++;
 	if (vcpu->run->s.regs.gprs[rx] & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-	rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
+	rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
 	if (rc)
 		return kvm_s390_inject_prog_cond(vcpu, rc);
 	if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@ -1001,6 +1001,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
 	const gfn_t gfn = gpa_to_gfn(gpa);
 	int rc;
 	if (!gfn_to_memslot(kvm, gfn))
 		return PGM_ADDRESSING;
 	if (mode == GACC_STORE)
 		rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
 	else
@ -1158,6 +1160,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
 		gra += fragment_len;
 		data += fragment_len;
 	}
 	if (rc > 0)
 		vcpu->arch.pgm.code = rc;
 	return rc;
 }
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
 * @len: number of bytes to copy
 *
 * Copy @len bytes from @data (kernel space) to @gra (guest real address).
 * It is up to the caller to ensure that the entire guest memory range is
 * valid memory before calling this function.
 * Guest low address and key protection are not checked.
 *
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying from @data failed, or
 * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
 * is also stored to allow injecting into the guest (if applicable) using
 * kvm_s390_inject_prog_cond().
 *
 * If an error occurs data may have been copied partially to guest memory.
 */
@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 * @len: number of bytes to copy
 *
 * Copy @len bytes from @gra (guest real address) to @data (kernel space).
 * It is up to the caller to ensure that the entire guest memory range is
 * valid memory before calling this function.
 * Guest key protection is not checked.
 *
- * Returns zero on success or -EFAULT on error.
+ * Returns zero on success, -EFAULT when copying to @data failed, or
 * PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
 * is also stored to allow injecting into the guest (if applicable) using
 * kvm_s390_inject_prog_cond().
 *
 * If an error occurs data may have been copied partially to kernel space.
 */
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@ -9,6 +9,8 @@
 #include <asm/unwind_hints.h>
 #include <asm/segment.h>
 #include <asm/cache.h>
 #include <asm/cpufeatures.h>
 #include <asm/nospec-branch.h>
 .pushsection .noinstr.text, "ax"
@ -17,6 +19,9 @@ SYM_FUNC_START(entry_ibpb)
 	movl	$PRED_CMD_IBPB, %eax
 	xorl	%edx, %edx
 	wrmsr
 	/* Make sure IBPB clears return stack preductions too. */
 	FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET
 	RET
 SYM_FUNC_END(entry_ibpb)
 /* For KVM */
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@ -216,7 +216,7 @@
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE	( 7*32+23) /* "" Disable Speculative Store Bypass. */
 #define X86_FEATURE_LS_CFG_SSBD		( 7*32+24)  /* "" AMD SSBD implementation via LS_CFG MSR */
 #define X86_FEATURE_IBRS		( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB		( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_IBPB		( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
 #define X86_FEATURE_STIBP		( 7*32+27) /* Single Thread Indirect Branch Predictors */
 #define X86_FEATURE_ZEN			( 7*32+28) /* "" Generic flag for all Zen and newer */
 #define X86_FEATURE_L1TF_PTEINV		( 7*32+29) /* "" L1TF workaround PTE inversion */
@ -347,6 +347,7 @@
 #define X86_FEATURE_CPPC		(13*32+27) /* Collaborative Processor Performance Control */
 #define X86_FEATURE_AMD_PSFD            (13*32+28) /* "" Predictive Store Forwarding Disable */
 #define X86_FEATURE_BTC_NO		(13*32+29) /* "" Not vulnerable to Branch Type Confusion */
 #define X86_FEATURE_AMD_IBPB_RET	(13*32+30) /* "" IBPB clears return address predictor */
 #define X86_FEATURE_BRS			(13*32+31) /* Branch Sampling available */
 /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
@ -516,4 +517,5 @@
 #define X86_BUG_DIV0			X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #define X86_BUG_RFDS			X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
 #define X86_BUG_BHI			X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
 #define X86_BUG_IBPB_NO_RET		X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
 #endif /* _ASM_X86_CPUFEATURES_H */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@ -1113,8 +1113,25 @@ do_cmd_auto:
 	case RETBLEED_MITIGATION_IBPB:
 		setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
 		/*
 		 * IBPB on entry already obviates the need for
 		 * software-based untraining so clear those in case some
 		 * other mitigation like SRSO has selected them.
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_UNRET);
 		setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
 		setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
 		mitigate_smt = true;
 		/*
 		 * There is no need for RSB filling: entry_ibpb() ensures
 		 * all predictions, including the RSB, are invalidated,
 		 * regardless of IBPB implementation.
 		 */
 		setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
 		break;
 	case RETBLEED_MITIGATION_STUFF:
@ -2610,6 +2627,14 @@ static void __init srso_select_mitigation(void)
 			if (has_microcode) {
 				setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
 				srso_mitigation = SRSO_MITIGATION_IBPB;
 				/*
 				 * IBPB on entry already obviates the need for
 				 * software-based untraining so clear those in case some
 				 * other mitigation like Retbleed has selected them.
 				 */
 				setup_clear_cpu_cap(X86_FEATURE_UNRET);
 				setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
 			}
 		} else {
 			pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
@ -2622,6 +2647,13 @@ static void __init srso_select_mitigation(void)
 			if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
 				setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
 				srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
 				/*
 				 * There is no need for RSB filling: entry_ibpb() ensures
 				 * all predictions, including the RSB, are invalidated,
 				 * regardless of IBPB implementation.
 				 */
 				setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
 			}
 		} else {
 			pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@ -1483,6 +1483,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	     boot_cpu_has(X86_FEATURE_HYPERVISOR)))
 		setup_force_cpu_bug(X86_BUG_BHI);
 	if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
 		setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
 	if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
 		return;
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
 	data->got_token = true;
 	smp_wmb();
 	list_del_init(&curr->entry);
 	wake_up_process(data->task);
 	list_del_init_careful(&curr->entry);
 	return 1;
 }
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@ -3925,8 +3925,10 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
 */
 static void domain_context_clear(struct device_domain_info *info)
 {
-	if (!dev_is_pci(info->dev))
+	if (!dev_is_pci(info->dev)) {
 		domain_context_clear_one(info, info->bus, info->devfn);
 		return;
 	}
 	pci_for_each_dma_alias(to_pci_dev(info->dev),
 			       &domain_context_clear_one_cb, info);
--- a/drivers/net/ethernet/freescale/fec_ptp.c
+++ b/drivers/net/ethernet/freescale/fec_ptp.c
@ -90,6 +90,30 @@
 #define FEC_PTP_MAX_NSEC_PERIOD		4000000000ULL
 #define FEC_PTP_MAX_NSEC_COUNTER	0x80000000ULL
 /**
 * fec_ptp_read - read raw cycle counter (to be used by time counter)
 * @cc: the cyclecounter structure
 *
 * this function reads the cyclecounter registers and is called by the
 * cyclecounter structure used to construct a ns counter from the
 * arbitrary fixed point registers
 */
 static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
 	struct fec_enet_private *fep =
 		container_of(cc, struct fec_enet_private, cc);
 	u32 tempval;
 	tempval = readl(fep->hwp + FEC_ATIME_CTRL);
 	tempval |= FEC_T_CTRL_CAPTURE;
 	writel(tempval, fep->hwp + FEC_ATIME_CTRL);
 	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
 		udelay(1);
 	return readl(fep->hwp + FEC_ATIME);
 }
 /**
 * fec_ptp_enable_pps
 * @fep: the fec_enet_private structure handle
@ -136,7 +160,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
 		 * NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds
 		 * to current timer would be next second.
 		 */
-		tempval = fep->cc.read(&fep->cc);
+		tempval = fec_ptp_read(&fep->cc);
 		/* Convert the ptp local counter to 1588 timestamp */
 		ns = timecounter_cyc2time(&fep->tc, tempval);
 		ts = ns_to_timespec64(ns);
@ -211,13 +235,7 @@ static int fec_ptp_pps_perout(struct fec_enet_private *fep)
 	timecounter_read(&fep->tc);
 	/* Get the current ptp hardware time counter */
-	temp_val = readl(fep->hwp + FEC_ATIME_CTRL);
+	ptp_hc = fec_ptp_read(&fep->cc);
 	temp_val |= FEC_T_CTRL_CAPTURE;
 	writel(temp_val, fep->hwp + FEC_ATIME_CTRL);
 	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
 		udelay(1);
 	ptp_hc = readl(fep->hwp + FEC_ATIME);
 	/* Convert the ptp local counter to 1588 timestamp */
 	curr_time = timecounter_cyc2time(&fep->tc, ptp_hc);
@ -271,30 +289,6 @@ static enum hrtimer_restart fec_ptp_pps_perout_handler(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 /**
 * fec_ptp_read - read raw cycle counter (to be used by time counter)
 * @cc: the cyclecounter structure
 *
 * this function reads the cyclecounter registers and is called by the
 * cyclecounter structure used to construct a ns counter from the
 * arbitrary fixed point registers
 */
 static u64 fec_ptp_read(const struct cyclecounter *cc)
 {
 	struct fec_enet_private *fep =
 		container_of(cc, struct fec_enet_private, cc);
 	u32 tempval;
 	tempval = readl(fep->hwp + FEC_ATIME_CTRL);
 	tempval |= FEC_T_CTRL_CAPTURE;
 	writel(tempval, fep->hwp + FEC_ATIME_CTRL);
 	if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
 		udelay(1);
 	return readl(fep->hwp + FEC_ATIME);
 }
 /**
 * fec_ptp_start_cyclecounter - create the cycle counter from hw
 * @ndev: network device
--- a/drivers/s390/char/sclp.c
+++ b/drivers/s390/char/sclp.c
@ -1195,7 +1195,8 @@ sclp_reboot_event(struct notifier_block *this, unsigned long event, void *ptr)
 }
 static struct notifier_block sclp_reboot_notifier = {
-	.notifier_call = sclp_reboot_event
+	.notifier_call = sclp_reboot_event,
 	.priority      = INT_MIN,
 };
 static ssize_t con_pages_show(struct device_driver *dev, char *buf)
--- a/drivers/s390/char/sclp_vt220.c
+++ b/drivers/s390/char/sclp_vt220.c
@ -319,7 +319,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
 	buffer = (void *) ((addr_t) sccb + sccb->header.length);
 	if (convertlf) {
-		/* Perform Linefeed conversion (0x0a -> 0x0a 0x0d)*/
+		/* Perform Linefeed conversion (0x0a -> 0x0d 0x0a)*/
 		for (from=0, to=0;
 		     (from < count) && (to < sclp_vt220_space_left(request));
 		     from++) {
@ -328,8 +328,8 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
 			/* Perform conversion */
 			if (c == 0x0a) {
 				if (to + 1 < sclp_vt220_space_left(request)) {
 					((unsigned char *) buffer)[to++] = c;
 					((unsigned char *) buffer)[to++] = 0x0d;
 					((unsigned char *) buffer)[to++] = c;
 				} else
 					break;
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@ -1565,12 +1565,23 @@ out_release:
 	return error;
 }
 /* Enforce that there is at most one namespace bit per attr. */
 inline bool xfs_attr_check_namespace(unsigned int attr_flags)
 {
 	return hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) < 2;
 }
 /* Returns true if the attribute entry name is valid. */
 bool
 xfs_attr_namecheck(
 	unsigned int	attr_flags,
 	const void	*name,
 	size_t		length)
 {
 	/* Only one namespace bit allowed. */
 	if (!xfs_attr_check_namespace(attr_flags))
 		return false;
 	/*
 	 * MAXNAMELEN includes the trailing null, but (name/length) leave it
 	 * out, so use >= for the length check.
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@ -547,7 +547,9 @@ int xfs_attr_get(struct xfs_da_args *args);
 int xfs_attr_set(struct xfs_da_args *args);
 int xfs_attr_set_iter(struct xfs_attr_intent *attr);
 int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
-bool xfs_attr_namecheck(const void *name, size_t length);
+bool xfs_attr_check_namespace(unsigned int attr_flags);
 bool xfs_attr_namecheck(unsigned int attr_flags, const void *name,
 		size_t length);
 int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
 void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
 			 unsigned int *total);
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@ -984,6 +984,10 @@ xfs_attr_shortform_to_leaf(
 		nargs.hashval = xfs_da_hashname(sfe->nameval,
 						sfe->namelen);
 		nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK;
 		if (!xfs_attr_check_namespace(sfe->flags)) {
 			error = -EFSCORRUPTED;
 			goto out;
 		}
 		error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
 		ASSERT(error == -ENOATTR);
 		error = xfs_attr3_leaf_add(bp, &nargs);
@ -1105,7 +1109,7 @@ xfs_attr_shortform_verify(
 		 * one namespace flag per xattr, so we can just count the
 		 * bits (i.e. hweight) here.
 		 */
-		if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
+		if (!xfs_attr_check_namespace(sfep->flags))
 			return __this_address;
 		sfep = next_sfep;
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@ -619,7 +619,6 @@ xfs_attr_rmtval_set_blk(
 	if (error)
 		return error;
 	ASSERT(nmap == 1);
 	ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
 	       (map->br_startblock != HOLESTARTBLOCK));
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@ -1549,6 +1549,7 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@ -1578,6 +1579,7 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@ -1611,6 +1613,7 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@ -1643,6 +1646,7 @@ xfs_bmap_add_extent_delay_real(
 				goto done;
 			}
 		}
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@ -1680,6 +1684,7 @@ xfs_bmap_add_extent_delay_real(
 			if (error)
 				goto done;
 		}
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_LEFT_FILLING:
@ -1767,6 +1772,7 @@ xfs_bmap_add_extent_delay_real(
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
 		xfs_iext_next(ifp, &bma->icur);
 		xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
 		ASSERT(da_new <= da_old);
 		break;
 	case BMAP_RIGHT_FILLING:
@ -1814,6 +1820,7 @@ xfs_bmap_add_extent_delay_real(
 		PREV.br_blockcount = temp;
 		xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
 		xfs_iext_next(ifp, &bma->icur);
 		ASSERT(da_new <= da_old);
 		break;
 	case 0:
@ -1934,11 +1941,9 @@ xfs_bmap_add_extent_delay_real(
 	}
 	/* adjust for changes in reserved delayed indirect blocks */
-	if (da_new != da_old) {
+	if (da_new != da_old)
 		ASSERT(state == 0 || da_new < da_old);
 		error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
-				false);
+				true);
 	}
 	xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
 done:
@ -3969,20 +3974,32 @@ xfs_bmapi_reserve_delalloc(
 	xfs_extlen_t		alen;
 	xfs_extlen_t		indlen;
 	int			error;
-	xfs_fileoff_t		aoff = off;
+	xfs_fileoff_t		aoff;
 	bool			use_cowextszhint =
 					whichfork == XFS_COW_FORK && !prealloc;
 retry:
 	/*
 	 * Cap the alloc length. Keep track of prealloc so we know whether to
 	 * tag the inode before we return.
 	 */
 	aoff = off;
 	alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
 	if (!eof)
 		alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
 	if (prealloc && alen >= len)
 		prealloc = alen - len;
-	/* Figure out the extent size, adjust alen */
+	/*
-	if (whichfork == XFS_COW_FORK) {
+	 * If we're targetting the COW fork but aren't creating a speculative
 	 * posteof preallocation, try to expand the reservation to align with
 	 * the COW extent size hint if there's sufficient free space.
 	 *
 	 * Unlike the data fork, the CoW cancellation functions will free all
 	 * the reservations at inactivation, so we don't require that every
 	 * delalloc reservation have a dirty pagecache.
 	 */
 	if (use_cowextszhint) {
 		struct xfs_bmbt_irec	prev;
 		xfs_extlen_t		extsz = xfs_get_cowextsz_hint(ip);
@ -4001,7 +4018,7 @@ xfs_bmapi_reserve_delalloc(
 	 */
 	error = xfs_quota_reserve_blkres(ip, alen);
 	if (error)
-		return error;
+		goto out;
 	/*
 	 * Split changing sb for alen and indlen since they could be coming
@ -4046,6 +4063,17 @@ out_unreserve_blocks:
 out_unreserve_quota:
 	if (XFS_IS_QUOTA_ON(mp))
 		xfs_quota_unreserve_blkres(ip, alen);
 out:
 	if (error == -ENOSPC || error == -EDQUOT) {
 		trace_xfs_delalloc_enospc(ip, off, len);
 		if (prealloc || use_cowextszhint) {
 			/* retry without any preallocation */
 			use_cowextszhint = false;
 			prealloc = 0;
 			goto retry;
 		}
 	}
 	return error;
 }
@ -4128,8 +4156,10 @@ xfs_bmapi_allocate(
 	} else {
 		error = xfs_bmap_alloc_userdata(bma);
 	}
-	if (error || bma->blkno == NULLFSBLOCK)
+	if (error)
 		return error;
 	if (bma->blkno == NULLFSBLOCK)
 		return -ENOSPC;
 	if (bma->flags & XFS_BMAPI_ZERO) {
 		error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
@ -4309,6 +4339,15 @@ xfs_bmapi_finish(
 * extent state if necessary.  Details behaviour is controlled by the flags
 * parameter.  Only allocates blocks from a single allocation group, to avoid
 * locking problems.
 *
 * Returns 0 on success and places the extent mappings in mval.  nmaps is used
 * as an input/output parameter where the caller specifies the maximum number
 * of mappings that may be returned and xfs_bmapi_write passes back the number
 * of mappings (including existing mappings) it found.
 *
 * Returns a negative error code on failure, including -ENOSPC when it could not
 * allocate any blocks and -ENOSR when it did allocate blocks to convert a
 * delalloc range, but those blocks were before the passed in range.
 */
 int
 xfs_bmapi_write(
@ -4436,10 +4475,16 @@ xfs_bmapi_write(
 			ASSERT(len > 0);
 			ASSERT(bma.length > 0);
 			error = xfs_bmapi_allocate(&bma);
-			if (error)
+			if (error) {
 				/*
 				 * If we already allocated space in a previous
 				 * iteration return what we go so far when
 				 * running out of space.
 				 */
 				if (error == -ENOSPC && bma.nallocs)
 					break;
 				goto error0;
-			if (bma.blkno == NULLFSBLOCK)
+			}
 				break;
 			/*
 			 * If this is a CoW allocation, record the data in
@ -4477,7 +4522,6 @@ xfs_bmapi_write(
 		if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
 			eof = true;
 	}
 	*nmap = n;
 	error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
 			whichfork);
@ -4488,7 +4532,22 @@ xfs_bmapi_write(
 	       ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
 	xfs_bmapi_finish(&bma, whichfork, 0);
 	xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
-		orig_nmap, *nmap);
+		orig_nmap, n);
 	/*
 	 * When converting delayed allocations, xfs_bmapi_allocate ignores
 	 * the passed in bno and always converts from the start of the found
 	 * delalloc extent.
 	 *
 	 * To avoid a successful return with *nmap set to 0, return the magic
 	 * -ENOSR error code for this particular case so that the caller can
 	 * handle it.
 	 */
 	if (!n) {
 		ASSERT(bma.nallocs >= *nmap);
 		return -ENOSR;
 	}
 	*nmap = n;
 	return 0;
 error0:
 	xfs_bmapi_finish(&bma, whichfork, error);
@ -4501,8 +4560,8 @@ error0:
 * invocations to allocate the target offset if a large enough physical extent
 * is not available.
 */
-int
+static int
-xfs_bmapi_convert_delalloc(
+xfs_bmapi_convert_one_delalloc(
 	struct xfs_inode	*ip,
 	int			whichfork,
 	xfs_off_t		offset,
@ -4559,7 +4618,8 @@ xfs_bmapi_convert_delalloc(
 	if (!isnullstartblock(bma.got.br_startblock)) {
 		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
 				xfs_iomap_inode_sequence(ip, flags));
-		*seq = READ_ONCE(ifp->if_seq);
+		if (seq)
 			*seq = READ_ONCE(ifp->if_seq);
 		goto out_trans_cancel;
 	}
@ -4595,9 +4655,6 @@ xfs_bmapi_convert_delalloc(
 	if (error)
 		goto out_finish;
 	error = -ENOSPC;
 	if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
 		goto out_finish;
 	error = -EFSCORRUPTED;
 	if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
 		goto out_finish;
@ -4608,7 +4665,8 @@ xfs_bmapi_convert_delalloc(
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
 	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
 				xfs_iomap_inode_sequence(ip, flags));
-	*seq = READ_ONCE(ifp->if_seq);
+	if (seq)
 		*seq = READ_ONCE(ifp->if_seq);
 	if (whichfork == XFS_COW_FORK)
 		xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
@ -4631,6 +4689,36 @@ out_trans_cancel:
 	return error;
 }
 /*
 * Pass in a dellalloc extent and convert it to real extents, return the real
 * extent that maps offset_fsb in iomap.
 */
 int
 xfs_bmapi_convert_delalloc(
 	struct xfs_inode	*ip,
 	int			whichfork,
 	loff_t			offset,
 	struct iomap		*iomap,
 	unsigned int		*seq)
 {
 	int			error;
 	/*
 	 * Attempt to allocate whatever delalloc extent currently backs offset
 	 * and put the result into iomap.  Allocate in a loop because it may
 	 * take several attempts to allocate real blocks for a contiguous
 	 * delalloc extent if free space is sufficiently fragmented.
 	 */
 	do {
 		error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
 					iomap, seq);
 		if (error)
 			return error;
 	} while (iomap->offset + iomap->length <= offset);
 	return 0;
 }
 int
 xfs_bmapi_remap(
 	struct xfs_trans	*tp,
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@ -2158,8 +2158,8 @@ xfs_da_grow_inode_int(
 	struct xfs_inode	*dp = args->dp;
 	int			w = args->whichfork;
 	xfs_rfsblock_t		nblks = dp->i_nblocks;
-	struct xfs_bmbt_irec	map, *mapp;
+	struct xfs_bmbt_irec	map, *mapp = &map;
-	int			nmap, error, got, i, mapi;
+	int			nmap, error, got, i, mapi = 1;
 	/*
 	 * Find a spot in the file space to put the new block.
@ -2175,14 +2175,7 @@ xfs_da_grow_inode_int(
 	error = xfs_bmapi_write(tp, dp, *bno, count,
 			xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
 			args->total, &map, &nmap);
-	if (error)
+	if (error == -ENOSPC && count > 1) {
 		return error;
 	ASSERT(nmap <= 1);
 	if (nmap == 1) {
 		mapp = &map;
 		mapi = 1;
 	} else if (nmap == 0 && count > 1) {
 		xfs_fileoff_t		b;
 		int			c;
@ -2199,16 +2192,13 @@ xfs_da_grow_inode_int(
 					args->total, &mapp[mapi], &nmap);
 			if (error)
 				goto out_free_map;
 			if (nmap < 1)
 				break;
 			mapi += nmap;
 			b = mapp[mapi - 1].br_startoff +
 			    mapp[mapi - 1].br_blockcount;
 		}
 	} else {
 		mapi = 0;
 		mapp = NULL;
 	}
 	if (error)
 		goto out_free_map;
 	/*
 	 * Count the blocks we got, make sure it matches the total.
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@ -703,8 +703,13 @@ struct xfs_attr3_leafblock {
 #define XFS_ATTR_ROOT		(1u << XFS_ATTR_ROOT_BIT)
 #define XFS_ATTR_SECURE		(1u << XFS_ATTR_SECURE_BIT)
 #define XFS_ATTR_INCOMPLETE	(1u << XFS_ATTR_INCOMPLETE_BIT)
 #define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | XFS_ATTR_SECURE)
 #define XFS_ATTR_ONDISK_MASK	(XFS_ATTR_NSP_ONDISK_MASK | \
 				 XFS_ATTR_LOCAL | \
 				 XFS_ATTR_INCOMPLETE)
 /*
 * Alignment for namelist and valuelist entries (since they are mixed
 * there can be only one alignment value)
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@ -366,17 +366,40 @@ xfs_dinode_verify_fork(
 	/*
 	 * For fork types that can contain local data, check that the fork
 	 * format matches the size of local data contained within the fork.
 	 *
 	 * For all types, check that when the size says the should be in extent
 	 * or btree format, the inode isn't claiming it is in local format.
 	 */
 	if (whichfork == XFS_DATA_FORK) {
-		if (S_ISDIR(mode) || S_ISLNK(mode)) {
+		/*
-			if (be64_to_cpu(dip->di_size) <= fork_size &&
+		 * A directory small enough to fit in the inode must be stored
 		 * in local format.  The directory sf <-> extents conversion
 		 * code updates the directory size accordingly.  Directories
 		 * being truncated have zero size and are not subject to this
 		 * check.
 		 */
 		if (S_ISDIR(mode)) {
 			if (dip->di_size &&
 			    be64_to_cpu(dip->di_size) <= fork_size &&
 			    fork_format != XFS_DINODE_FMT_LOCAL)
 				return __this_address;
 		}
 		/*
 		 * A symlink with a target small enough to fit in the inode can
 		 * be stored in extents format if xattrs were added (thus
 		 * converting the data fork from shortform to remote format)
 		 * and then removed.
 		 */
 		if (S_ISLNK(mode)) {
 			if (be64_to_cpu(dip->di_size) <= fork_size &&
 			    fork_format != XFS_DINODE_FMT_EXTENTS &&
 			    fork_format != XFS_DINODE_FMT_LOCAL)
 				return __this_address;
 		}
 		/*
 		 * For all types, check that when the size says the fork should
 		 * be in extent or btree format, the inode isn't claiming to be
 		 * in local format.
 		 */
 		if (be64_to_cpu(dip->di_size) > fork_size &&
 		    fork_format == XFS_DINODE_FMT_LOCAL)
 			return __this_address;
@ -492,9 +515,19 @@ xfs_dinode_verify(
 	if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
 		return __this_address;
-	/* No zero-length symlinks/dirs. */
+	/*
-	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
+	 * No zero-length symlinks/dirs unless they're unlinked and hence being
-		return __this_address;
+	 * inactivated.
 	 */
 	if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
 		if (dip->di_version > 1) {
 			if (dip->di_nlink)
 				return __this_address;
 		} else {
 			if (dip->di_onlink)
 				return __this_address;
 		}
 	}
 	fa = xfs_dinode_verify_nrext64(mp, dip);
 	if (fa)
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@ -1031,11 +1031,12 @@ xfs_log_sb(
 	 * and hence we don't need have to update it here.
 	 */
 	if (xfs_has_lazysbcount(mp)) {
-		mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+		mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
 		mp->m_sb.sb_ifree = min_t(uint64_t,
-				percpu_counter_sum(&mp->m_ifree),
+				percpu_counter_sum_positive(&mp->m_ifree),
 				mp->m_sb.sb_icount);
-		mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+		mp->m_sb.sb_fdblocks =
 				percpu_counter_sum_positive(&mp->m_fdblocks);
 	}
 	xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@ -182,32 +182,23 @@ xchk_xattr_listent(
 		return;
 	}
 	if (flags & ~XFS_ATTR_ONDISK_MASK) {
 		xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
 		goto fail_xref;
 	}
 	if (flags & XFS_ATTR_INCOMPLETE) {
 		/* Incomplete attr key, just mark the inode for preening. */
 		xchk_ino_set_preen(sx->sc, context->dp->i_ino);
 		return;
 	}
 	/* Only one namespace bit allowed. */
 	if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) {
 		xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
 		goto fail_xref;
 	}
 	/* Does this name make sense? */
-	if (!xfs_attr_namecheck(name, namelen)) {
+	if (!xfs_attr_namecheck(flags, name, namelen)) {
 		xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
 		goto fail_xref;
 	}
 	/*
 	 * Local xattr values are stored in the attr leaf block, so we don't
 	 * need to retrieve the value from a remote block to detect corruption
 	 * problems.
 	 */
 	if (flags & XFS_ATTR_LOCAL)
 		goto fail_xref;
 	/*
 	 * Try to allocate enough memory to extrat the attr value.  If that
 	 * doesn't work, we overload the seen_enough variable to convey
@ -223,6 +214,11 @@ xchk_xattr_listent(
 	args.value = ab->value;
 	/*
 	 * Get the attr value to ensure that lookup can find this attribute
 	 * through the dabtree indexing and that remote value retrieval also
 	 * works correctly.
 	 */
 	error = xfs_attr_get_ilocked(&args);
 	/* ENODATA means the hash lookup failed and the attr is bad */
 	if (error == -ENODATA)
@ -463,7 +459,6 @@ xchk_xattr_rec(
 	xfs_dahash_t			hash;
 	int				nameidx;
 	int				hdrsize;
 	unsigned int			badflags;
 	int				error;
 	ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
@ -493,10 +488,15 @@ xchk_xattr_rec(
 	/* Retrieve the entry and check it. */
 	hash = be32_to_cpu(ent->hashval);
-	badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
+	if (ent->flags & ~XFS_ATTR_ONDISK_MASK) {
 			XFS_ATTR_INCOMPLETE);
 	if ((ent->flags & badflags) != 0)
 		xchk_da_set_corrupt(ds, level);
 		return 0;
 	}
 	if (!xfs_attr_check_namespace(ent->flags)) {
 		xchk_da_set_corrupt(ds, level);
 		return 0;
 	}
 	if (ent->flags & XFS_ATTR_LOCAL) {
 		lentry = (struct xfs_attr_leaf_name_local *)
 				(((char *)bp->b_addr) + nameidx);
@ -561,6 +561,15 @@ xchk_xattr_check_sf(
 			break;
 		}
 		/*
 		 * Shortform entries do not set LOCAL or INCOMPLETE, so the
 		 * only valid flag bits here are for namespaces.
 		 */
 		if (sfe->flags & ~XFS_ATTR_NSP_ONDISK_MASK) {
 			xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0);
 			break;
 		}
 		if (!xchk_xattr_set_map(sc, ab->usedmap,
 				(char *)sfe - (char *)sf,
 				sizeof(struct xfs_attr_sf_entry))) {
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@ -735,7 +735,7 @@ xchk_iget(
 {
 	ASSERT(sc->tp != NULL);
-	return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
+	return xfs_iget(sc->mp, sc->tp, inum, XCHK_IGET_FLAGS, 0, ipp);
 }
 /*
@ -786,8 +786,8 @@ again:
 	if (error)
 		return error;
-	error = xfs_iget(mp, tp, inum,
+	error = xfs_iget(mp, tp, inum, XFS_IGET_NORETRY | XCHK_IGET_FLAGS, 0,
-			XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
+			ipp);
 	if (error == -EAGAIN) {
 		/*
 		 * The inode may be in core but temporarily unavailable and may
@ -994,12 +994,6 @@ xchk_irele(
 		spin_lock(&VFS_I(ip)->i_lock);
 		VFS_I(ip)->i_state &= ~I_DONTCACHE;
 		spin_unlock(&VFS_I(ip)->i_lock);
 	} else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
 		/*
 		 * If this is the last reference to the inode and the caller
 		 * permits it, set DONTCACHE to avoid thrashing.
 		 */
 		d_mark_dontcache(VFS_I(ip));
 	}
 	xfs_irele(ip);
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@ -17,6 +17,13 @@ struct xfs_scrub;
 #define XCHK_GFP_FLAGS	((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
 					 __GFP_RETRY_MAYFAIL))
 /*
 * For opening files by handle for fsck operations, we don't trust the inumber
 * or the allocation state; therefore, perform an untrusted lookup.  We don't
 * want these inodes to pollute the cache, so mark them for immediate removal.
 */
 #define XCHK_IGET_FLAGS	(XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
 /* Type info and names for the scrub types. */
 enum xchk_type {
 	ST_NONE = 1,	/* disabled */
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@ -233,45 +233,6 @@ xfs_imap_valid(
 	return true;
 }
 /*
 * Pass in a dellalloc extent and convert it to real extents, return the real
 * extent that maps offset_fsb in wpc->iomap.
 *
 * The current page is held locked so nothing could have removed the block
 * backing offset_fsb, although it could have moved from the COW to the data
 * fork by another thread.
 */
 static int
 xfs_convert_blocks(
 	struct iomap_writepage_ctx *wpc,
 	struct xfs_inode	*ip,
 	int			whichfork,
 	loff_t			offset)
 {
 	int			error;
 	unsigned		*seq;
 	if (whichfork == XFS_COW_FORK)
 		seq = &XFS_WPC(wpc)->cow_seq;
 	else
 		seq = &XFS_WPC(wpc)->data_seq;
 	/*
 	 * Attempt to allocate whatever delalloc extent currently backs offset
 	 * and put the result into wpc->iomap.  Allocate in a loop because it
 	 * may take several attempts to allocate real blocks for a contiguous
 	 * delalloc extent if free space is sufficiently fragmented.
 	 */
 	do {
 		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
 				&wpc->iomap, seq);
 		if (error)
 			return error;
 	} while (wpc->iomap.offset + wpc->iomap.length <= offset);
 	return 0;
 }
 static int
 xfs_map_blocks(
 	struct iomap_writepage_ctx *wpc,
@ -289,6 +250,7 @@ xfs_map_blocks(
 	struct xfs_iext_cursor	icur;
 	int			retries = 0;
 	int			error = 0;
 	unsigned int		*seq;
 	if (xfs_is_shutdown(mp))
 		return -EIO;
@ -386,7 +348,19 @@ retry:
 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
 	return 0;
 allocate_blocks:
-	error = xfs_convert_blocks(wpc, ip, whichfork, offset);
+	/*
 	 * Convert a dellalloc extent to a real one. The current page is held
 	 * locked so nothing could have removed the block backing offset_fsb,
 	 * although it could have moved from the COW to the data fork by another
 	 * thread.
 	 */
 	if (whichfork == XFS_COW_FORK)
 		seq = &XFS_WPC(wpc)->cow_seq;
 	else
 		seq = &XFS_WPC(wpc)->data_seq;
 	error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
 				&wpc->iomap, seq);
 	if (error) {
 		/*
 		 * If we failed to find the extent in the COW fork we might have
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@ -510,6 +510,9 @@ xfs_attri_validate(
 	unsigned int			op = attrp->alfi_op_flags &
 					     XFS_ATTRI_OP_FLAGS_TYPE_MASK;
 	if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
 		return false;
 	if (attrp->__pad != 0)
 		return false;
@ -519,6 +522,10 @@ xfs_attri_validate(
 	if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK)
 		return false;
 	if (!xfs_attr_check_namespace(attrp->alfi_attr_filter &
 				      XFS_ATTR_NSP_ONDISK_MASK))
 		return false;
 	/* alfi_op_flags should be either a set or remove */
 	switch (op) {
 	case XFS_ATTRI_OP_FLAGS_SET:
@ -569,7 +576,8 @@ xfs_attri_item_recover(
 	 */
 	attrp = &attrip->attri_format;
 	if (!xfs_attri_validate(mp, attrp) ||
-	    !xfs_attr_namecheck(nv->name.i_addr, nv->name.i_len))
+	    !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.i_addr,
 				nv->name.i_len))
 		return -EFSCORRUPTED;
 	error = xlog_recover_iget(mp,  attrp->alfi_ino, &ip);
@ -602,8 +610,6 @@ xfs_attri_item_recover(
 	args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
 			 XFS_DA_OP_LOGGED;
 	ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb));
 	switch (attr->xattri_op_flags) {
 	case XFS_ATTRI_OP_FLAGS_SET:
 	case XFS_ATTRI_OP_FLAGS_REPLACE:
@ -718,48 +724,112 @@ xlog_recover_attri_commit_pass2(
 	const void			*attr_value = NULL;
 	const void			*attr_name;
 	size_t				len;
-
+	unsigned int			op, i = 0;
 	attri_formatp = item->ri_buf[0].i_addr;
 	attr_name = item->ri_buf[1].i_addr;
 	/* Validate xfs_attri_log_format before the large memory allocation */
 	len = sizeof(struct xfs_attri_log_format);
-	if (item->ri_buf[0].i_len != len) {
+	if (item->ri_buf[i].i_len != len) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 				item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
 		return -EFSCORRUPTED;
 	}
 	attri_formatp = item->ri_buf[i].i_addr;
 	if (!xfs_attri_validate(mp, attri_formatp)) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
-				item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+				attri_formatp, len);
 		return -EFSCORRUPTED;
 	}
 	/* Check the number of log iovecs makes sense for the op code. */
 	op = attri_formatp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK;
 	switch (op) {
 	case XFS_ATTRI_OP_FLAGS_SET:
 	case XFS_ATTRI_OP_FLAGS_REPLACE:
 		/* Log item, attr name, attr value */
 		if (item->ri_total != 3) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 					     attri_formatp, len);
 			return -EFSCORRUPTED;
 		}
 		break;
 	case XFS_ATTRI_OP_FLAGS_REMOVE:
 		/* Log item, attr name */
 		if (item->ri_total != 2) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 					     attri_formatp, len);
 			return -EFSCORRUPTED;
 		}
 		break;
 	default:
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 				     attri_formatp, len);
 		return -EFSCORRUPTED;
 	}
 	i++;
 	/* Validate the attr name */
-	if (item->ri_buf[1].i_len !=
+	if (item->ri_buf[i].i_len !=
 			xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
-				item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+				attri_formatp, len);
 		return -EFSCORRUPTED;
 	}
-	if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
+	attr_name = item->ri_buf[i].i_addr;
 	if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, attr_name,
 				attri_formatp->alfi_name_len)) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
-				item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
+				attri_formatp, len);
 		return -EFSCORRUPTED;
 	}
 	i++;
 	/* Validate the attr value, if present */
 	if (attri_formatp->alfi_value_len != 0) {
-		if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
+		if (item->ri_buf[i].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 					item->ri_buf[0].i_addr,
 					item->ri_buf[0].i_len);
 			return -EFSCORRUPTED;
 		}
-		attr_value = item->ri_buf[2].i_addr;
+		attr_value = item->ri_buf[i].i_addr;
 		i++;
 	}
 	/*
 	 * Make sure we got the correct number of buffers for the operation
 	 * that we just loaded.
 	 */
 	if (i != item->ri_total) {
 		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 				attri_formatp, len);
 		return -EFSCORRUPTED;
 	}
 	switch (op) {
 	case XFS_ATTRI_OP_FLAGS_REMOVE:
 		/* Regular remove operations operate only on names. */
 		if (attr_value != NULL || attri_formatp->alfi_value_len != 0) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 					     attri_formatp, len);
 			return -EFSCORRUPTED;
 		}
 		fallthrough;
 	case XFS_ATTRI_OP_FLAGS_SET:
 	case XFS_ATTRI_OP_FLAGS_REPLACE:
 		/*
 		 * Regular xattr set/remove/replace operations require a name
 		 * and do not take a newname.  Values are optional for set and
 		 * replace.
 		 */
 		if (attr_name == NULL || attri_formatp->alfi_name_len == 0) {
 			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
 					     attri_formatp, len);
 			return -EFSCORRUPTED;
 		}
 		break;
 	}
 	/*
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@ -82,7 +82,8 @@ xfs_attr_shortform_list(
 	     (dp->i_af.if_bytes + sf->hdr.count * 16) < context->bufsize)) {
 		for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 			if (XFS_IS_CORRUPT(context->dp->i_mount,
-					   !xfs_attr_namecheck(sfe->nameval,
+					   !xfs_attr_namecheck(sfe->flags,
 							       sfe->nameval,
 							       sfe->namelen)))
 				return -EFSCORRUPTED;
 			context->put_listent(context,
@ -120,7 +121,8 @@ xfs_attr_shortform_list(
 	for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
 		if (unlikely(
 		    ((char *)sfe < (char *)sf) ||
-		    ((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)))) {
+		    ((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)) ||
 		    !xfs_attr_check_namespace(sfe->flags))) {
 			XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
 					     XFS_ERRLEVEL_LOW,
 					     context->dp->i_mount, sfe,
@ -174,7 +176,7 @@ xfs_attr_shortform_list(
 			cursor->offset = 0;
 		}
 		if (XFS_IS_CORRUPT(context->dp->i_mount,
-				   !xfs_attr_namecheck(sbp->name,
+				   !xfs_attr_namecheck(sbp->flags, sbp->name,
 						       sbp->namelen))) {
 			error = -EFSCORRUPTED;
 			goto out;
@ -465,7 +467,8 @@ xfs_attr3_leaf_list_int(
 		}
 		if (XFS_IS_CORRUPT(context->dp->i_mount,
-				   !xfs_attr_namecheck(name, namelen)))
+				   !xfs_attr_namecheck(entry->flags, name,
 						       namelen)))
 			return -EFSCORRUPTED;
 		context->put_listent(context, entry->flags,
 					      name, namelen, valuelen);
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@ -636,13 +636,11 @@ out_unlock:
 /*
 * Test whether it is appropriate to check an inode for and free post EOF
- * blocks. The 'force' parameter determines whether we should also consider
+ * blocks.
 * regular files that are marked preallocated or append-only.
 */
 bool
 xfs_can_free_eofblocks(
-	struct xfs_inode	*ip,
+	struct xfs_inode	*ip)
 	bool			force)
 {
 	struct xfs_bmbt_irec	imap;
 	struct xfs_mount	*mp = ip->i_mount;
@ -676,11 +674,11 @@ xfs_can_free_eofblocks(
 		return false;
 	/*
-	 * Do not free real preallocated or append-only files unless the file
+	 * Only free real extents for inodes with persistent preallocations or
-	 * has delalloc blocks and we are forced to remove them.
+	 * the append-only flag.
 	 */
 	if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
-		if (!force || ip->i_delayed_blks == 0)
+		if (ip->i_delayed_blks == 0)
 			return false;
 	/*
@ -734,6 +732,22 @@ xfs_free_eofblocks(
 	/* Wait on dio to ensure i_size has settled. */
 	inode_dio_wait(VFS_I(ip));
 	/*
 	 * For preallocated files only free delayed allocations.
 	 *
 	 * Note that this means we also leave speculative preallocations in
 	 * place for preallocated files.
 	 */
 	if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
 		if (ip->i_delayed_blks) {
 			xfs_bmap_punch_delalloc_range(ip,
 				round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
 				LLONG_MAX);
 		}
 		xfs_inode_clear_eofblocks_tag(ip);
 		return 0;
 	}
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
 	if (error) {
 		ASSERT(xfs_is_shutdown(mp));
@ -868,33 +882,32 @@ xfs_alloc_file_space(
 		if (error)
 			goto error;
 		/*
 		 * If the allocator cannot find a single free extent large
 		 * enough to cover the start block of the requested range,
 		 * xfs_bmapi_write will return -ENOSR.
 		 *
 		 * In that case we simply need to keep looping with the same
 		 * startoffset_fsb so that one of the following allocations
 		 * will eventually reach the requested range.
 		 */
 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
 				allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
 				&nimaps);
-		if (error)
+		if (error) {
-			goto error;
+			if (error != -ENOSR)
 				goto error;
 			error = 0;
 		} else {
 			startoffset_fsb += imapp->br_blockcount;
 			allocatesize_fsb -= imapp->br_blockcount;
 		}
 		ip->i_diflags |= XFS_DIFLAG_PREALLOC;
 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 		error = xfs_trans_commit(tp);
 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		if (error)
 			break;
 		/*
 		 * If the allocator cannot find a single free extent large
 		 * enough to cover the start block of the requested range,
 		 * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
 		 *
 		 * In that case we simply need to keep looping with the same
 		 * startoffset_fsb so that one of the following allocations
 		 * will eventually reach the requested range.
 		 */
 		if (nimaps) {
 			startoffset_fsb += imapp->br_blockcount;
 			allocatesize_fsb -= imapp->br_blockcount;
 		}
 	}
 	return error;
@ -1049,7 +1062,7 @@ xfs_prepare_shift(
 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
 	 * into the accessible region of the file.
 	 */
-	if (xfs_can_free_eofblocks(ip, true)) {
+	if (xfs_can_free_eofblocks(ip)) {
 		error = xfs_free_eofblocks(ip);
 		if (error)
 			return error;
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@ -63,7 +63,7 @@ int	xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
 				xfs_off_t len);
 /* EOF block manipulation functions */
-bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
+bool	xfs_can_free_eofblocks(struct xfs_inode *ip);
 int	xfs_free_eofblocks(struct xfs_inode *ip);
 int	xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@ -333,7 +333,6 @@ xfs_dquot_disk_alloc(
 		goto err_cancel;
 	ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
 	ASSERT(nmaps == 1);
 	ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
 	       (map.br_startblock != HOLESTARTBLOCK));
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@ -1149,7 +1149,7 @@ xfs_inode_free_eofblocks(
 	}
 	*lockflags |= XFS_IOLOCK_EXCL;
-	if (xfs_can_free_eofblocks(ip, false))
+	if (xfs_can_free_eofblocks(ip))
 		return xfs_free_eofblocks(ip);
 	/* inode could be preallocated or append-only */
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@ -1469,7 +1469,7 @@ xfs_release(
 	if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
 		return 0;
-	if (xfs_can_free_eofblocks(ip, false)) {
+	if (xfs_can_free_eofblocks(ip)) {
 		/*
 		 * Check if the inode is being opened, written and closed
 		 * frequently and we have delayed allocation blocks outstanding
@ -1685,15 +1685,13 @@ xfs_inode_needs_inactive(
 	/*
 	 * This file isn't being freed, so check if there are post-eof blocks
-	 * to free.  @force is true because we are evicting an inode from the
+	 * to free.
 	 * cache.  Post-eof blocks must be freed, lest we end up with broken
 	 * free space accounting.
 	 *
 	 * Note: don't bother with iolock here since lockdep complains about
 	 * acquiring it in reclaim context. We have the only reference to the
 	 * inode at this point anyways.
 	 */
-	return xfs_can_free_eofblocks(ip, true);
+	return xfs_can_free_eofblocks(ip);
 }
 /*
@ -1741,15 +1739,11 @@ xfs_inactive(
 	if (VFS_I(ip)->i_nlink != 0) {
 		/*
 		 * force is true because we are evicting an inode from the
 		 * cache. Post-eof blocks must be freed, lest we end up with
 		 * broken free space accounting.
 		 *
 		 * Note: don't bother with iolock here since lockdep complains
 		 * about acquiring it in reclaim context. We have the only
 		 * reference to the inode at this point anyways.
 		 */
-		if (xfs_can_free_eofblocks(ip, true))
+		if (xfs_can_free_eofblocks(ip))
 			error = xfs_free_eofblocks(ip);
 		goto out;
@ -2329,11 +2323,26 @@ xfs_ifree_cluster(
 		 * This buffer may not have been correctly initialised as we
 		 * didn't read it from disk. That's not important because we are
 		 * only using to mark the buffer as stale in the log, and to
-		 * attach stale cached inodes on it. That means it will never be
+		 * attach stale cached inodes on it.
-		 * dispatched for IO. If it is, we want to know about it, and we
+		 *
-		 * want it to fail. We can acheive this by adding a write
+		 * For the inode that triggered the cluster freeing, this
-		 * verifier to the buffer.
+		 * attachment may occur in xfs_inode_item_precommit() after we
 		 * have marked this buffer stale.  If this buffer was not in
 		 * memory before xfs_ifree_cluster() started, it will not be
 		 * marked XBF_DONE and this will cause problems later in
 		 * xfs_inode_item_precommit() when we trip over a (stale, !done)
 		 * buffer to attached to the transaction.
 		 *
 		 * Hence we have to mark the buffer as XFS_DONE here. This is
 		 * safe because we are also marking the buffer as XBF_STALE and
 		 * XFS_BLI_STALE. That means it will never be dispatched for
 		 * IO and it won't be unlocked until the cluster freeing has
 		 * been committed to the journal and the buffer unpinned. If it
 		 * is written, we want to know about it, and we want it to
 		 * fail. We can acheive this by adding a write verifier to the
 		 * buffer.
 		 */
 		bp->b_flags |= XBF_DONE;
 		bp->b_ops = &xfs_inode_buf_ops;
 		/*
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@ -317,14 +317,6 @@ xfs_iomap_write_direct(
 	if (error)
 		goto out_unlock;
 	/*
 	 * Copy any maps to caller's array and return any error.
 	 */
 	if (nimaps == 0) {
 		error = -ENOSPC;
 		goto out_unlock;
 	}
 	if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
 		error = xfs_alert_fsblock_zero(ip, imap);
@ -1013,6 +1005,24 @@ xfs_buffered_write_iomap_begin(
 		goto out_unlock;
 	}
 	/*
 	 * For zeroing, trim a delalloc extent that extends beyond the EOF
 	 * block.  If it starts beyond the EOF block, convert it to an
 	 * unwritten extent.
 	 */
 	if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb &&
 	    isnullstartblock(imap.br_startblock)) {
 		xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
 		if (offset_fsb >= eof_fsb)
 			goto convert_delay;
 		if (end_fsb > eof_fsb) {
 			end_fsb = eof_fsb;
 			xfs_trim_extent(&imap, offset_fsb,
 					end_fsb - offset_fsb);
 		}
 	}
 	/*
 	 * Search the COW fork extent list even if we did not find a data fork
 	 * extent.  This serves two purposes: first this implements the
@ -1117,47 +1127,48 @@ xfs_buffered_write_iomap_begin(
 		}
 	}
 retry:
 	error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
 			end_fsb - offset_fsb, prealloc_blocks,
 			allocfork == XFS_DATA_FORK ? &imap : &cmap,
 			allocfork == XFS_DATA_FORK ? &icur : &ccur,
 			allocfork == XFS_DATA_FORK ? eof : cow_eof);
 	switch (error) {
 	case 0:
 		break;
 	case -ENOSPC:
 	case -EDQUOT:
 		/* retry without any preallocation */
 		trace_xfs_delalloc_enospc(ip, offset, count);
 		if (prealloc_blocks) {
 			prealloc_blocks = 0;
 			goto retry;
 		}
 		fallthrough;
 	default:
 		goto out_unlock;
 	}
 	if (allocfork == XFS_COW_FORK) {
 		error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
 				end_fsb - offset_fsb, prealloc_blocks, &cmap,
 				&ccur, cow_eof);
 		if (error)
 			goto out_unlock;
 		trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
 		goto found_cow;
 	}
 	error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
 			end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
 			eof);
 	if (error)
 		goto out_unlock;
 	/*
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
 	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
 	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
 found_imap:
 	seq = xfs_iomap_inode_sequence(ip, 0);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
 convert_delay:
 	xfs_iunlock(ip, lockmode);
 	truncate_pagecache(inode, offset);
 	error = xfs_bmapi_convert_delalloc(ip, XFS_DATA_FORK, offset,
 					   iomap, NULL);
 	if (error)
 		return error;
 	trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &imap);
 	return 0;
 found_cow:
 	seq = xfs_iomap_inode_sequence(ip, 0);
 	if (imap.br_startoff <= offset_fsb) {
@ -1165,17 +1176,17 @@ found_cow:
 		if (error)
 			goto out_unlock;
 		seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_iunlock(ip, lockmode);
 		return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
 					 IOMAP_F_SHARED, seq);
 	}
 	xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
 out_unlock:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	xfs_iunlock(ip, lockmode);
 	return error;
 }
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@ -429,13 +429,6 @@ xfs_reflink_fill_cow_hole(
 	if (error)
 		return error;
 	/*
 	 * Allocation succeeded but the requested range was not even partially
 	 * satisfied?  Bail out!
 	 */
 	if (nimaps == 0)
 		return -ENOSPC;
 convert:
 	return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
@ -498,13 +491,6 @@ xfs_reflink_fill_delalloc(
 		error = xfs_trans_commit(tp);
 		if (error)
 			return error;
 		/*
 		 * Allocation succeeded but the requested range was not even
 		 * partially satisfied?  Bail out!
 		 */
 		if (nimaps == 0)
 			return -ENOSPC;
 	} while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
 	return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
@ -730,12 +716,6 @@ xfs_reflink_end_cow_extent(
 	int			nmaps;
 	int			error;
 	/* No COW extents?  That's easy! */
 	if (ifp->if_bytes == 0) {
 		*offset_fsb = end_fsb;
 		return 0;
 	}
 	resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
 			XFS_TRANS_RESERVE, &tp);
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@ -840,8 +840,6 @@ xfs_growfs_rt_alloc(
 		nmap = 1;
 		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
 					XFS_BMAPI_METADATA, 0, &map, &nmap);
 		if (!error && nmap < 1)
 			error = -ENOSPC;
 		if (error)
 			goto out_trans_cancel;
 		/*
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@ -2240,6 +2240,8 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
 /*
 * mas_wr_node_walk() - Find the correct offset for the index in the @mas.
 *                      If @mas->index cannot be found within the containing
 *                      node, we traverse to the last entry in the node.
 * @wr_mas: The maple write state
 *
 * Uses mas_slot_locked() and does not need to worry about dead nodes.
@ -3655,7 +3657,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
 	return true;
 }
-static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
+static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
 {
 	struct ma_state *mas = wr_mas->mas;
@ -3664,11 +3666,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
 		wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
 						  mas->offset);
 		if (ma_is_leaf(wr_mas->type))
-			return true;
+			return;
 		mas_wr_walk_traverse(wr_mas);
 	}
 	return true;
 }
 /*
 * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
@ -3904,8 +3904,8 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
 	memset(&b_node, 0, sizeof(struct maple_big_node));
 	/* Copy l_mas and store the value in b_node. */
 	mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
-	/* Copy r_mas into b_node. */
+	/* Copy r_mas into b_node if there is anything to copy. */
-	if (r_mas.offset <= r_wr_mas.node_end)
+	if (r_mas.max > r_mas.last)
 		mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
 			   &b_node, b_node.b_end + 1);
 	else
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@ -15,6 +15,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
 	SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
 	SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
 	SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
 	SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT),
 	SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
 	SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
 	SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@ -8,6 +8,7 @@ enum linux_mptcp_mib_field {
 	MPTCP_MIB_MPCAPABLEPASSIVEACK,	/* Received third ACK with MP_CAPABLE */
 	MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
 	MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
 	MPTCP_MIB_MPCAPABLEENDPATTEMPT,	/* Prohibited MPC to port-based endp */
 	MPTCP_MIB_TOKENFALLBACKINIT,	/* Could not init/allocate token */
 	MPTCP_MIB_RETRANSSEGS,		/* Segments retransmitted at the MPTCP-level */
 	MPTCP_MIB_JOINNOTOKEN,		/* Received MP_JOIN but the token was not found */
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@ -1125,6 +1125,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
 	 */
 	inet_sk_state_store(newsk, TCP_LISTEN);
 	lock_sock(ssk);
 	WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
 	err = __inet_listen_sk(ssk, backlog);
 	if (!err)
 		mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@ -504,6 +504,7 @@ struct mptcp_subflow_context {
 		__unused : 9;
 	enum mptcp_data_avail data_avail;
 	bool	scheduled;
 	bool	pm_listener;	    /* a listener managed by the kernel PM? */
 	u32	remote_nonce;
 	u64	thmac;
 	u32	local_nonce;
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
 	}
 }
 static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb)
 {
 	SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT);
 	subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
 	return -EPERM;
 }
 /* Init mptcp request socket.
 *
 * Returns an error code if a JOIN has failed and a TCP reset
@ -163,6 +170,8 @@ static int subflow_check_req(struct request_sock *req,
 	if (opt_mp_capable) {
 		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
 		if (unlikely(listener->pm_listener))
 			return subflow_reset_req_endp(req, skb);
 		if (opt_mp_join)
 			return 0;
 	} else if (opt_mp_join) {
@ -170,6 +179,8 @@ static int subflow_check_req(struct request_sock *req,
 		if (mp_opt.backup)
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
 	} else if (unlikely(listener->pm_listener)) {
 		return subflow_reset_req_endp(req, skb);
 	}
 	if (opt_mp_capable && listener->request_mptcp) {