mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-07-19 07:39:54 +02:00
Merge 4c5b123ab2
("blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race") into android15-6.6-lts
Steps on the way to 6.6.57 Change-Id: I561755de546d2b23668440400c65d012c2700435 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
commit
db82f3c8d1
|
@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
|
|||
vcpu->stat.instruction_diagnose_258++;
|
||||
if (vcpu->run->s.regs.gprs[rx] & 7)
|
||||
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
|
||||
rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
|
||||
rc = read_guest_real(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
|
||||
if (rc)
|
||||
return kvm_s390_inject_prog_cond(vcpu, rc);
|
||||
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
|
||||
|
|
|
@ -1001,6 +1001,8 @@ static int access_guest_page(struct kvm *kvm, enum gacc_mode mode, gpa_t gpa,
|
|||
const gfn_t gfn = gpa_to_gfn(gpa);
|
||||
int rc;
|
||||
|
||||
if (!gfn_to_memslot(kvm, gfn))
|
||||
return PGM_ADDRESSING;
|
||||
if (mode == GACC_STORE)
|
||||
rc = kvm_write_guest_page(kvm, gfn, data, offset, len);
|
||||
else
|
||||
|
@ -1158,6 +1160,8 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
|
|||
gra += fragment_len;
|
||||
data += fragment_len;
|
||||
}
|
||||
if (rc > 0)
|
||||
vcpu->arch.pgm.code = rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
|
|
@ -405,11 +405,12 @@ int read_guest_abs(struct kvm_vcpu *vcpu, unsigned long gpa, void *data,
|
|||
* @len: number of bytes to copy
|
||||
*
|
||||
* Copy @len bytes from @data (kernel space) to @gra (guest real address).
|
||||
* It is up to the caller to ensure that the entire guest memory range is
|
||||
* valid memory before calling this function.
|
||||
* Guest low address and key protection are not checked.
|
||||
*
|
||||
* Returns zero on success or -EFAULT on error.
|
||||
* Returns zero on success, -EFAULT when copying from @data failed, or
|
||||
* PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
|
||||
* is also stored to allow injecting into the guest (if applicable) using
|
||||
* kvm_s390_inject_prog_cond().
|
||||
*
|
||||
* If an error occurs data may have been copied partially to guest memory.
|
||||
*/
|
||||
|
@ -428,11 +429,12 @@ int write_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
|
|||
* @len: number of bytes to copy
|
||||
*
|
||||
* Copy @len bytes from @gra (guest real address) to @data (kernel space).
|
||||
* It is up to the caller to ensure that the entire guest memory range is
|
||||
* valid memory before calling this function.
|
||||
* Guest key protection is not checked.
|
||||
*
|
||||
* Returns zero on success or -EFAULT on error.
|
||||
* Returns zero on success, -EFAULT when copying to @data failed, or
|
||||
* PGM_ADRESSING in case @gra is outside a memslot. In this case, pgm check info
|
||||
* is also stored to allow injecting into the guest (if applicable) using
|
||||
* kvm_s390_inject_prog_cond().
|
||||
*
|
||||
* If an error occurs data may have been copied partially to kernel space.
|
||||
*/
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
#include <asm/unwind_hints.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
.pushsection .noinstr.text, "ax"
|
||||
|
||||
|
@ -17,6 +19,9 @@ SYM_FUNC_START(entry_ibpb)
|
|||
movl $PRED_CMD_IBPB, %eax
|
||||
xorl %edx, %edx
|
||||
wrmsr
|
||||
|
||||
/* Make sure IBPB clears return stack preductions too. */
|
||||
FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_BUG_IBPB_NO_RET
|
||||
RET
|
||||
SYM_FUNC_END(entry_ibpb)
|
||||
/* For KVM */
|
||||
|
|
|
@ -216,7 +216,7 @@
|
|||
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
|
||||
#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
|
||||
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
|
||||
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
|
||||
#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
|
||||
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
|
||||
#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
|
||||
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
|
||||
|
@ -347,6 +347,7 @@
|
|||
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
|
||||
#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
|
||||
#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
|
||||
#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* "" IBPB clears return address predictor */
|
||||
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
||||
|
@ -516,4 +517,5 @@
|
|||
#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
|
||||
#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
|
||||
#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
|
||||
#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
|
|
@ -1113,8 +1113,25 @@ do_cmd_auto:
|
|||
|
||||
case RETBLEED_MITIGATION_IBPB:
|
||||
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
|
||||
|
||||
/*
|
||||
* IBPB on entry already obviates the need for
|
||||
* software-based untraining so clear those in case some
|
||||
* other mitigation like SRSO has selected them.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_UNRET);
|
||||
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
|
||||
|
||||
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
|
||||
mitigate_smt = true;
|
||||
|
||||
/*
|
||||
* There is no need for RSB filling: entry_ibpb() ensures
|
||||
* all predictions, including the RSB, are invalidated,
|
||||
* regardless of IBPB implementation.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
|
||||
|
||||
break;
|
||||
|
||||
case RETBLEED_MITIGATION_STUFF:
|
||||
|
@ -2610,6 +2627,14 @@ static void __init srso_select_mitigation(void)
|
|||
if (has_microcode) {
|
||||
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
|
||||
srso_mitigation = SRSO_MITIGATION_IBPB;
|
||||
|
||||
/*
|
||||
* IBPB on entry already obviates the need for
|
||||
* software-based untraining so clear those in case some
|
||||
* other mitigation like Retbleed has selected them.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_UNRET);
|
||||
setup_clear_cpu_cap(X86_FEATURE_RETHUNK);
|
||||
}
|
||||
} else {
|
||||
pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
|
||||
|
@ -2622,6 +2647,13 @@ static void __init srso_select_mitigation(void)
|
|||
if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
|
||||
setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
|
||||
srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
|
||||
|
||||
/*
|
||||
* There is no need for RSB filling: entry_ibpb() ensures
|
||||
* all predictions, including the RSB, are invalidated,
|
||||
* regardless of IBPB implementation.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_RSB_VMEXIT);
|
||||
}
|
||||
} else {
|
||||
pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
|
||||
|
|
|
@ -1483,6 +1483,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
|
|||
boot_cpu_has(X86_FEATURE_HYPERVISOR)))
|
||||
setup_force_cpu_bug(X86_BUG_BHI);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_AMD_IBPB) && !cpu_has(c, X86_FEATURE_AMD_IBPB_RET))
|
||||
setup_force_cpu_bug(X86_BUG_IBPB_NO_RET);
|
||||
|
||||
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
|
||||
return;
|
||||
|
||||
|
|
|
@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr,
|
|||
|
||||
data->got_token = true;
|
||||
smp_wmb();
|
||||
list_del_init(&curr->entry);
|
||||
wake_up_process(data->task);
|
||||
list_del_init_careful(&curr->entry);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -3925,8 +3925,10 @@ static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *op
|
|||
*/
|
||||
static void domain_context_clear(struct device_domain_info *info)
|
||||
{
|
||||
if (!dev_is_pci(info->dev))
|
||||
if (!dev_is_pci(info->dev)) {
|
||||
domain_context_clear_one(info, info->bus, info->devfn);
|
||||
return;
|
||||
}
|
||||
|
||||
pci_for_each_dma_alias(to_pci_dev(info->dev),
|
||||
&domain_context_clear_one_cb, info);
|
||||
|
|
|
@ -90,6 +90,30 @@
|
|||
#define FEC_PTP_MAX_NSEC_PERIOD 4000000000ULL
|
||||
#define FEC_PTP_MAX_NSEC_COUNTER 0x80000000ULL
|
||||
|
||||
/**
|
||||
* fec_ptp_read - read raw cycle counter (to be used by time counter)
|
||||
* @cc: the cyclecounter structure
|
||||
*
|
||||
* this function reads the cyclecounter registers and is called by the
|
||||
* cyclecounter structure used to construct a ns counter from the
|
||||
* arbitrary fixed point registers
|
||||
*/
|
||||
static u64 fec_ptp_read(const struct cyclecounter *cc)
|
||||
{
|
||||
struct fec_enet_private *fep =
|
||||
container_of(cc, struct fec_enet_private, cc);
|
||||
u32 tempval;
|
||||
|
||||
tempval = readl(fep->hwp + FEC_ATIME_CTRL);
|
||||
tempval |= FEC_T_CTRL_CAPTURE;
|
||||
writel(tempval, fep->hwp + FEC_ATIME_CTRL);
|
||||
|
||||
if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
|
||||
udelay(1);
|
||||
|
||||
return readl(fep->hwp + FEC_ATIME);
|
||||
}
|
||||
|
||||
/**
|
||||
* fec_ptp_enable_pps
|
||||
* @fep: the fec_enet_private structure handle
|
||||
|
@ -136,7 +160,7 @@ static int fec_ptp_enable_pps(struct fec_enet_private *fep, uint enable)
|
|||
* NSEC_PER_SEC - ts.tv_nsec. Add the remaining nanoseconds
|
||||
* to current timer would be next second.
|
||||
*/
|
||||
tempval = fep->cc.read(&fep->cc);
|
||||
tempval = fec_ptp_read(&fep->cc);
|
||||
/* Convert the ptp local counter to 1588 timestamp */
|
||||
ns = timecounter_cyc2time(&fep->tc, tempval);
|
||||
ts = ns_to_timespec64(ns);
|
||||
|
@ -211,13 +235,7 @@ static int fec_ptp_pps_perout(struct fec_enet_private *fep)
|
|||
timecounter_read(&fep->tc);
|
||||
|
||||
/* Get the current ptp hardware time counter */
|
||||
temp_val = readl(fep->hwp + FEC_ATIME_CTRL);
|
||||
temp_val |= FEC_T_CTRL_CAPTURE;
|
||||
writel(temp_val, fep->hwp + FEC_ATIME_CTRL);
|
||||
if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
|
||||
udelay(1);
|
||||
|
||||
ptp_hc = readl(fep->hwp + FEC_ATIME);
|
||||
ptp_hc = fec_ptp_read(&fep->cc);
|
||||
|
||||
/* Convert the ptp local counter to 1588 timestamp */
|
||||
curr_time = timecounter_cyc2time(&fep->tc, ptp_hc);
|
||||
|
@ -271,30 +289,6 @@ static enum hrtimer_restart fec_ptp_pps_perout_handler(struct hrtimer *timer)
|
|||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
/**
|
||||
* fec_ptp_read - read raw cycle counter (to be used by time counter)
|
||||
* @cc: the cyclecounter structure
|
||||
*
|
||||
* this function reads the cyclecounter registers and is called by the
|
||||
* cyclecounter structure used to construct a ns counter from the
|
||||
* arbitrary fixed point registers
|
||||
*/
|
||||
static u64 fec_ptp_read(const struct cyclecounter *cc)
|
||||
{
|
||||
struct fec_enet_private *fep =
|
||||
container_of(cc, struct fec_enet_private, cc);
|
||||
u32 tempval;
|
||||
|
||||
tempval = readl(fep->hwp + FEC_ATIME_CTRL);
|
||||
tempval |= FEC_T_CTRL_CAPTURE;
|
||||
writel(tempval, fep->hwp + FEC_ATIME_CTRL);
|
||||
|
||||
if (fep->quirks & FEC_QUIRK_BUG_CAPTURE)
|
||||
udelay(1);
|
||||
|
||||
return readl(fep->hwp + FEC_ATIME);
|
||||
}
|
||||
|
||||
/**
|
||||
* fec_ptp_start_cyclecounter - create the cycle counter from hw
|
||||
* @ndev: network device
|
||||
|
|
|
@ -1195,7 +1195,8 @@ sclp_reboot_event(struct notifier_block *this, unsigned long event, void *ptr)
|
|||
}
|
||||
|
||||
static struct notifier_block sclp_reboot_notifier = {
|
||||
.notifier_call = sclp_reboot_event
|
||||
.notifier_call = sclp_reboot_event,
|
||||
.priority = INT_MIN,
|
||||
};
|
||||
|
||||
static ssize_t con_pages_show(struct device_driver *dev, char *buf)
|
||||
|
|
|
@ -319,7 +319,7 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
|
|||
buffer = (void *) ((addr_t) sccb + sccb->header.length);
|
||||
|
||||
if (convertlf) {
|
||||
/* Perform Linefeed conversion (0x0a -> 0x0a 0x0d)*/
|
||||
/* Perform Linefeed conversion (0x0a -> 0x0d 0x0a)*/
|
||||
for (from=0, to=0;
|
||||
(from < count) && (to < sclp_vt220_space_left(request));
|
||||
from++) {
|
||||
|
@ -328,8 +328,8 @@ sclp_vt220_add_msg(struct sclp_vt220_request *request,
|
|||
/* Perform conversion */
|
||||
if (c == 0x0a) {
|
||||
if (to + 1 < sclp_vt220_space_left(request)) {
|
||||
((unsigned char *) buffer)[to++] = c;
|
||||
((unsigned char *) buffer)[to++] = 0x0d;
|
||||
((unsigned char *) buffer)[to++] = c;
|
||||
} else
|
||||
break;
|
||||
|
||||
|
|
|
@ -1565,12 +1565,23 @@ out_release:
|
|||
return error;
|
||||
}
|
||||
|
||||
/* Enforce that there is at most one namespace bit per attr. */
|
||||
inline bool xfs_attr_check_namespace(unsigned int attr_flags)
|
||||
{
|
||||
return hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) < 2;
|
||||
}
|
||||
|
||||
/* Returns true if the attribute entry name is valid. */
|
||||
bool
|
||||
xfs_attr_namecheck(
|
||||
unsigned int attr_flags,
|
||||
const void *name,
|
||||
size_t length)
|
||||
{
|
||||
/* Only one namespace bit allowed. */
|
||||
if (!xfs_attr_check_namespace(attr_flags))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* MAXNAMELEN includes the trailing null, but (name/length) leave it
|
||||
* out, so use >= for the length check.
|
||||
|
|
|
@ -547,7 +547,9 @@ int xfs_attr_get(struct xfs_da_args *args);
|
|||
int xfs_attr_set(struct xfs_da_args *args);
|
||||
int xfs_attr_set_iter(struct xfs_attr_intent *attr);
|
||||
int xfs_attr_remove_iter(struct xfs_attr_intent *attr);
|
||||
bool xfs_attr_namecheck(const void *name, size_t length);
|
||||
bool xfs_attr_check_namespace(unsigned int attr_flags);
|
||||
bool xfs_attr_namecheck(unsigned int attr_flags, const void *name,
|
||||
size_t length);
|
||||
int xfs_attr_calc_size(struct xfs_da_args *args, int *local);
|
||||
void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres,
|
||||
unsigned int *total);
|
||||
|
|
|
@ -984,6 +984,10 @@ xfs_attr_shortform_to_leaf(
|
|||
nargs.hashval = xfs_da_hashname(sfe->nameval,
|
||||
sfe->namelen);
|
||||
nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK;
|
||||
if (!xfs_attr_check_namespace(sfe->flags)) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto out;
|
||||
}
|
||||
error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */
|
||||
ASSERT(error == -ENOATTR);
|
||||
error = xfs_attr3_leaf_add(bp, &nargs);
|
||||
|
@ -1105,7 +1109,7 @@ xfs_attr_shortform_verify(
|
|||
* one namespace flag per xattr, so we can just count the
|
||||
* bits (i.e. hweight) here.
|
||||
*/
|
||||
if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
|
||||
if (!xfs_attr_check_namespace(sfep->flags))
|
||||
return __this_address;
|
||||
|
||||
sfep = next_sfep;
|
||||
|
|
|
@ -619,7 +619,6 @@ xfs_attr_rmtval_set_blk(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(nmap == 1);
|
||||
ASSERT((map->br_startblock != DELAYSTARTBLOCK) &&
|
||||
(map->br_startblock != HOLESTARTBLOCK));
|
||||
|
||||
|
|
|
@ -1549,6 +1549,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
if (error)
|
||||
goto done;
|
||||
}
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
|
||||
|
@ -1578,6 +1579,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
if (error)
|
||||
goto done;
|
||||
}
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
|
||||
|
@ -1611,6 +1613,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
if (error)
|
||||
goto done;
|
||||
}
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
|
||||
|
@ -1643,6 +1646,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
goto done;
|
||||
}
|
||||
}
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
|
||||
|
@ -1680,6 +1684,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
if (error)
|
||||
goto done;
|
||||
}
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_LEFT_FILLING:
|
||||
|
@ -1767,6 +1772,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
xfs_iext_update_extent(bma->ip, state, &bma->icur, &PREV);
|
||||
xfs_iext_next(ifp, &bma->icur);
|
||||
xfs_iext_update_extent(bma->ip, state, &bma->icur, &RIGHT);
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case BMAP_RIGHT_FILLING:
|
||||
|
@ -1814,6 +1820,7 @@ xfs_bmap_add_extent_delay_real(
|
|||
PREV.br_blockcount = temp;
|
||||
xfs_iext_insert(bma->ip, &bma->icur, &PREV, state);
|
||||
xfs_iext_next(ifp, &bma->icur);
|
||||
ASSERT(da_new <= da_old);
|
||||
break;
|
||||
|
||||
case 0:
|
||||
|
@ -1934,11 +1941,9 @@ xfs_bmap_add_extent_delay_real(
|
|||
}
|
||||
|
||||
/* adjust for changes in reserved delayed indirect blocks */
|
||||
if (da_new != da_old) {
|
||||
ASSERT(state == 0 || da_new < da_old);
|
||||
if (da_new != da_old)
|
||||
error = xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new),
|
||||
false);
|
||||
}
|
||||
true);
|
||||
|
||||
xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
|
||||
done:
|
||||
|
@ -3969,20 +3974,32 @@ xfs_bmapi_reserve_delalloc(
|
|||
xfs_extlen_t alen;
|
||||
xfs_extlen_t indlen;
|
||||
int error;
|
||||
xfs_fileoff_t aoff = off;
|
||||
xfs_fileoff_t aoff;
|
||||
bool use_cowextszhint =
|
||||
whichfork == XFS_COW_FORK && !prealloc;
|
||||
|
||||
retry:
|
||||
/*
|
||||
* Cap the alloc length. Keep track of prealloc so we know whether to
|
||||
* tag the inode before we return.
|
||||
*/
|
||||
aoff = off;
|
||||
alen = XFS_FILBLKS_MIN(len + prealloc, XFS_MAX_BMBT_EXTLEN);
|
||||
if (!eof)
|
||||
alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
|
||||
if (prealloc && alen >= len)
|
||||
prealloc = alen - len;
|
||||
|
||||
/* Figure out the extent size, adjust alen */
|
||||
if (whichfork == XFS_COW_FORK) {
|
||||
/*
|
||||
* If we're targetting the COW fork but aren't creating a speculative
|
||||
* posteof preallocation, try to expand the reservation to align with
|
||||
* the COW extent size hint if there's sufficient free space.
|
||||
*
|
||||
* Unlike the data fork, the CoW cancellation functions will free all
|
||||
* the reservations at inactivation, so we don't require that every
|
||||
* delalloc reservation have a dirty pagecache.
|
||||
*/
|
||||
if (use_cowextszhint) {
|
||||
struct xfs_bmbt_irec prev;
|
||||
xfs_extlen_t extsz = xfs_get_cowextsz_hint(ip);
|
||||
|
||||
|
@ -4001,7 +4018,7 @@ xfs_bmapi_reserve_delalloc(
|
|||
*/
|
||||
error = xfs_quota_reserve_blkres(ip, alen);
|
||||
if (error)
|
||||
return error;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Split changing sb for alen and indlen since they could be coming
|
||||
|
@ -4046,6 +4063,17 @@ out_unreserve_blocks:
|
|||
out_unreserve_quota:
|
||||
if (XFS_IS_QUOTA_ON(mp))
|
||||
xfs_quota_unreserve_blkres(ip, alen);
|
||||
out:
|
||||
if (error == -ENOSPC || error == -EDQUOT) {
|
||||
trace_xfs_delalloc_enospc(ip, off, len);
|
||||
|
||||
if (prealloc || use_cowextszhint) {
|
||||
/* retry without any preallocation */
|
||||
use_cowextszhint = false;
|
||||
prealloc = 0;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -4128,8 +4156,10 @@ xfs_bmapi_allocate(
|
|||
} else {
|
||||
error = xfs_bmap_alloc_userdata(bma);
|
||||
}
|
||||
if (error || bma->blkno == NULLFSBLOCK)
|
||||
if (error)
|
||||
return error;
|
||||
if (bma->blkno == NULLFSBLOCK)
|
||||
return -ENOSPC;
|
||||
|
||||
if (bma->flags & XFS_BMAPI_ZERO) {
|
||||
error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
|
||||
|
@ -4309,6 +4339,15 @@ xfs_bmapi_finish(
|
|||
* extent state if necessary. Details behaviour is controlled by the flags
|
||||
* parameter. Only allocates blocks from a single allocation group, to avoid
|
||||
* locking problems.
|
||||
*
|
||||
* Returns 0 on success and places the extent mappings in mval. nmaps is used
|
||||
* as an input/output parameter where the caller specifies the maximum number
|
||||
* of mappings that may be returned and xfs_bmapi_write passes back the number
|
||||
* of mappings (including existing mappings) it found.
|
||||
*
|
||||
* Returns a negative error code on failure, including -ENOSPC when it could not
|
||||
* allocate any blocks and -ENOSR when it did allocate blocks to convert a
|
||||
* delalloc range, but those blocks were before the passed in range.
|
||||
*/
|
||||
int
|
||||
xfs_bmapi_write(
|
||||
|
@ -4436,10 +4475,16 @@ xfs_bmapi_write(
|
|||
ASSERT(len > 0);
|
||||
ASSERT(bma.length > 0);
|
||||
error = xfs_bmapi_allocate(&bma);
|
||||
if (error)
|
||||
if (error) {
|
||||
/*
|
||||
* If we already allocated space in a previous
|
||||
* iteration return what we go so far when
|
||||
* running out of space.
|
||||
*/
|
||||
if (error == -ENOSPC && bma.nallocs)
|
||||
break;
|
||||
goto error0;
|
||||
if (bma.blkno == NULLFSBLOCK)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a CoW allocation, record the data in
|
||||
|
@ -4477,7 +4522,6 @@ xfs_bmapi_write(
|
|||
if (!xfs_iext_next_extent(ifp, &bma.icur, &bma.got))
|
||||
eof = true;
|
||||
}
|
||||
*nmap = n;
|
||||
|
||||
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
|
||||
whichfork);
|
||||
|
@ -4488,7 +4532,22 @@ xfs_bmapi_write(
|
|||
ifp->if_nextents > XFS_IFORK_MAXEXT(ip, whichfork));
|
||||
xfs_bmapi_finish(&bma, whichfork, 0);
|
||||
xfs_bmap_validate_ret(orig_bno, orig_len, orig_flags, orig_mval,
|
||||
orig_nmap, *nmap);
|
||||
orig_nmap, n);
|
||||
|
||||
/*
|
||||
* When converting delayed allocations, xfs_bmapi_allocate ignores
|
||||
* the passed in bno and always converts from the start of the found
|
||||
* delalloc extent.
|
||||
*
|
||||
* To avoid a successful return with *nmap set to 0, return the magic
|
||||
* -ENOSR error code for this particular case so that the caller can
|
||||
* handle it.
|
||||
*/
|
||||
if (!n) {
|
||||
ASSERT(bma.nallocs >= *nmap);
|
||||
return -ENOSR;
|
||||
}
|
||||
*nmap = n;
|
||||
return 0;
|
||||
error0:
|
||||
xfs_bmapi_finish(&bma, whichfork, error);
|
||||
|
@ -4501,8 +4560,8 @@ error0:
|
|||
* invocations to allocate the target offset if a large enough physical extent
|
||||
* is not available.
|
||||
*/
|
||||
int
|
||||
xfs_bmapi_convert_delalloc(
|
||||
static int
|
||||
xfs_bmapi_convert_one_delalloc(
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
xfs_off_t offset,
|
||||
|
@ -4559,7 +4618,8 @@ xfs_bmapi_convert_delalloc(
|
|||
if (!isnullstartblock(bma.got.br_startblock)) {
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
|
||||
xfs_iomap_inode_sequence(ip, flags));
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
if (seq)
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
goto out_trans_cancel;
|
||||
}
|
||||
|
||||
|
@ -4595,9 +4655,6 @@ xfs_bmapi_convert_delalloc(
|
|||
if (error)
|
||||
goto out_finish;
|
||||
|
||||
error = -ENOSPC;
|
||||
if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
|
||||
goto out_finish;
|
||||
error = -EFSCORRUPTED;
|
||||
if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
|
||||
goto out_finish;
|
||||
|
@ -4608,7 +4665,8 @@ xfs_bmapi_convert_delalloc(
|
|||
ASSERT(!isnullstartblock(bma.got.br_startblock));
|
||||
xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
|
||||
xfs_iomap_inode_sequence(ip, flags));
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
if (seq)
|
||||
*seq = READ_ONCE(ifp->if_seq);
|
||||
|
||||
if (whichfork == XFS_COW_FORK)
|
||||
xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
|
||||
|
@ -4631,6 +4689,36 @@ out_trans_cancel:
|
|||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pass in a dellalloc extent and convert it to real extents, return the real
|
||||
* extent that maps offset_fsb in iomap.
|
||||
*/
|
||||
int
|
||||
xfs_bmapi_convert_delalloc(
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
loff_t offset,
|
||||
struct iomap *iomap,
|
||||
unsigned int *seq)
|
||||
{
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Attempt to allocate whatever delalloc extent currently backs offset
|
||||
* and put the result into iomap. Allocate in a loop because it may
|
||||
* take several attempts to allocate real blocks for a contiguous
|
||||
* delalloc extent if free space is sufficiently fragmented.
|
||||
*/
|
||||
do {
|
||||
error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset,
|
||||
iomap, seq);
|
||||
if (error)
|
||||
return error;
|
||||
} while (iomap->offset + iomap->length <= offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
xfs_bmapi_remap(
|
||||
struct xfs_trans *tp,
|
||||
|
|
|
@ -2158,8 +2158,8 @@ xfs_da_grow_inode_int(
|
|||
struct xfs_inode *dp = args->dp;
|
||||
int w = args->whichfork;
|
||||
xfs_rfsblock_t nblks = dp->i_nblocks;
|
||||
struct xfs_bmbt_irec map, *mapp;
|
||||
int nmap, error, got, i, mapi;
|
||||
struct xfs_bmbt_irec map, *mapp = ↦
|
||||
int nmap, error, got, i, mapi = 1;
|
||||
|
||||
/*
|
||||
* Find a spot in the file space to put the new block.
|
||||
|
@ -2175,14 +2175,7 @@ xfs_da_grow_inode_int(
|
|||
error = xfs_bmapi_write(tp, dp, *bno, count,
|
||||
xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
|
||||
args->total, &map, &nmap);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
ASSERT(nmap <= 1);
|
||||
if (nmap == 1) {
|
||||
mapp = ↦
|
||||
mapi = 1;
|
||||
} else if (nmap == 0 && count > 1) {
|
||||
if (error == -ENOSPC && count > 1) {
|
||||
xfs_fileoff_t b;
|
||||
int c;
|
||||
|
||||
|
@ -2199,16 +2192,13 @@ xfs_da_grow_inode_int(
|
|||
args->total, &mapp[mapi], &nmap);
|
||||
if (error)
|
||||
goto out_free_map;
|
||||
if (nmap < 1)
|
||||
break;
|
||||
mapi += nmap;
|
||||
b = mapp[mapi - 1].br_startoff +
|
||||
mapp[mapi - 1].br_blockcount;
|
||||
}
|
||||
} else {
|
||||
mapi = 0;
|
||||
mapp = NULL;
|
||||
}
|
||||
if (error)
|
||||
goto out_free_map;
|
||||
|
||||
/*
|
||||
* Count the blocks we got, make sure it matches the total.
|
||||
|
|
|
@ -703,8 +703,13 @@ struct xfs_attr3_leafblock {
|
|||
#define XFS_ATTR_ROOT (1u << XFS_ATTR_ROOT_BIT)
|
||||
#define XFS_ATTR_SECURE (1u << XFS_ATTR_SECURE_BIT)
|
||||
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
|
||||
|
||||
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
|
||||
|
||||
#define XFS_ATTR_ONDISK_MASK (XFS_ATTR_NSP_ONDISK_MASK | \
|
||||
XFS_ATTR_LOCAL | \
|
||||
XFS_ATTR_INCOMPLETE)
|
||||
|
||||
/*
|
||||
* Alignment for namelist and valuelist entries (since they are mixed
|
||||
* there can be only one alignment value)
|
||||
|
|
|
@ -366,17 +366,40 @@ xfs_dinode_verify_fork(
|
|||
/*
|
||||
* For fork types that can contain local data, check that the fork
|
||||
* format matches the size of local data contained within the fork.
|
||||
*
|
||||
* For all types, check that when the size says the should be in extent
|
||||
* or btree format, the inode isn't claiming it is in local format.
|
||||
*/
|
||||
if (whichfork == XFS_DATA_FORK) {
|
||||
if (S_ISDIR(mode) || S_ISLNK(mode)) {
|
||||
if (be64_to_cpu(dip->di_size) <= fork_size &&
|
||||
/*
|
||||
* A directory small enough to fit in the inode must be stored
|
||||
* in local format. The directory sf <-> extents conversion
|
||||
* code updates the directory size accordingly. Directories
|
||||
* being truncated have zero size and are not subject to this
|
||||
* check.
|
||||
*/
|
||||
if (S_ISDIR(mode)) {
|
||||
if (dip->di_size &&
|
||||
be64_to_cpu(dip->di_size) <= fork_size &&
|
||||
fork_format != XFS_DINODE_FMT_LOCAL)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
/*
|
||||
* A symlink with a target small enough to fit in the inode can
|
||||
* be stored in extents format if xattrs were added (thus
|
||||
* converting the data fork from shortform to remote format)
|
||||
* and then removed.
|
||||
*/
|
||||
if (S_ISLNK(mode)) {
|
||||
if (be64_to_cpu(dip->di_size) <= fork_size &&
|
||||
fork_format != XFS_DINODE_FMT_EXTENTS &&
|
||||
fork_format != XFS_DINODE_FMT_LOCAL)
|
||||
return __this_address;
|
||||
}
|
||||
|
||||
/*
|
||||
* For all types, check that when the size says the fork should
|
||||
* be in extent or btree format, the inode isn't claiming to be
|
||||
* in local format.
|
||||
*/
|
||||
if (be64_to_cpu(dip->di_size) > fork_size &&
|
||||
fork_format == XFS_DINODE_FMT_LOCAL)
|
||||
return __this_address;
|
||||
|
@ -492,9 +515,19 @@ xfs_dinode_verify(
|
|||
if (mode && xfs_mode_to_ftype(mode) == XFS_DIR3_FT_UNKNOWN)
|
||||
return __this_address;
|
||||
|
||||
/* No zero-length symlinks/dirs. */
|
||||
if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0)
|
||||
return __this_address;
|
||||
/*
|
||||
* No zero-length symlinks/dirs unless they're unlinked and hence being
|
||||
* inactivated.
|
||||
*/
|
||||
if ((S_ISLNK(mode) || S_ISDIR(mode)) && di_size == 0) {
|
||||
if (dip->di_version > 1) {
|
||||
if (dip->di_nlink)
|
||||
return __this_address;
|
||||
} else {
|
||||
if (dip->di_onlink)
|
||||
return __this_address;
|
||||
}
|
||||
}
|
||||
|
||||
fa = xfs_dinode_verify_nrext64(mp, dip);
|
||||
if (fa)
|
||||
|
|
|
@ -1031,11 +1031,12 @@ xfs_log_sb(
|
|||
* and hence we don't need have to update it here.
|
||||
*/
|
||||
if (xfs_has_lazysbcount(mp)) {
|
||||
mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
|
||||
mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
|
||||
mp->m_sb.sb_ifree = min_t(uint64_t,
|
||||
percpu_counter_sum(&mp->m_ifree),
|
||||
percpu_counter_sum_positive(&mp->m_ifree),
|
||||
mp->m_sb.sb_icount);
|
||||
mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
|
||||
mp->m_sb.sb_fdblocks =
|
||||
percpu_counter_sum_positive(&mp->m_fdblocks);
|
||||
}
|
||||
|
||||
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
|
||||
|
|
|
@ -182,32 +182,23 @@ xchk_xattr_listent(
|
|||
return;
|
||||
}
|
||||
|
||||
if (flags & ~XFS_ATTR_ONDISK_MASK) {
|
||||
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
|
||||
goto fail_xref;
|
||||
}
|
||||
|
||||
if (flags & XFS_ATTR_INCOMPLETE) {
|
||||
/* Incomplete attr key, just mark the inode for preening. */
|
||||
xchk_ino_set_preen(sx->sc, context->dp->i_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Only one namespace bit allowed. */
|
||||
if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) {
|
||||
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
|
||||
goto fail_xref;
|
||||
}
|
||||
|
||||
/* Does this name make sense? */
|
||||
if (!xfs_attr_namecheck(name, namelen)) {
|
||||
if (!xfs_attr_namecheck(flags, name, namelen)) {
|
||||
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
|
||||
goto fail_xref;
|
||||
}
|
||||
|
||||
/*
|
||||
* Local xattr values are stored in the attr leaf block, so we don't
|
||||
* need to retrieve the value from a remote block to detect corruption
|
||||
* problems.
|
||||
*/
|
||||
if (flags & XFS_ATTR_LOCAL)
|
||||
goto fail_xref;
|
||||
|
||||
/*
|
||||
* Try to allocate enough memory to extrat the attr value. If that
|
||||
* doesn't work, we overload the seen_enough variable to convey
|
||||
|
@ -223,6 +214,11 @@ xchk_xattr_listent(
|
|||
|
||||
args.value = ab->value;
|
||||
|
||||
/*
|
||||
* Get the attr value to ensure that lookup can find this attribute
|
||||
* through the dabtree indexing and that remote value retrieval also
|
||||
* works correctly.
|
||||
*/
|
||||
error = xfs_attr_get_ilocked(&args);
|
||||
/* ENODATA means the hash lookup failed and the attr is bad */
|
||||
if (error == -ENODATA)
|
||||
|
@ -463,7 +459,6 @@ xchk_xattr_rec(
|
|||
xfs_dahash_t hash;
|
||||
int nameidx;
|
||||
int hdrsize;
|
||||
unsigned int badflags;
|
||||
int error;
|
||||
|
||||
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
|
||||
|
@ -493,10 +488,15 @@ xchk_xattr_rec(
|
|||
|
||||
/* Retrieve the entry and check it. */
|
||||
hash = be32_to_cpu(ent->hashval);
|
||||
badflags = ~(XFS_ATTR_LOCAL | XFS_ATTR_ROOT | XFS_ATTR_SECURE |
|
||||
XFS_ATTR_INCOMPLETE);
|
||||
if ((ent->flags & badflags) != 0)
|
||||
if (ent->flags & ~XFS_ATTR_ONDISK_MASK) {
|
||||
xchk_da_set_corrupt(ds, level);
|
||||
return 0;
|
||||
}
|
||||
if (!xfs_attr_check_namespace(ent->flags)) {
|
||||
xchk_da_set_corrupt(ds, level);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (ent->flags & XFS_ATTR_LOCAL) {
|
||||
lentry = (struct xfs_attr_leaf_name_local *)
|
||||
(((char *)bp->b_addr) + nameidx);
|
||||
|
@ -561,6 +561,15 @@ xchk_xattr_check_sf(
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shortform entries do not set LOCAL or INCOMPLETE, so the
|
||||
* only valid flag bits here are for namespaces.
|
||||
*/
|
||||
if (sfe->flags & ~XFS_ATTR_NSP_ONDISK_MASK) {
|
||||
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!xchk_xattr_set_map(sc, ab->usedmap,
|
||||
(char *)sfe - (char *)sf,
|
||||
sizeof(struct xfs_attr_sf_entry))) {
|
||||
|
|
|
@ -735,7 +735,7 @@ xchk_iget(
|
|||
{
|
||||
ASSERT(sc->tp != NULL);
|
||||
|
||||
return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
|
||||
return xfs_iget(sc->mp, sc->tp, inum, XCHK_IGET_FLAGS, 0, ipp);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -786,8 +786,8 @@ again:
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
error = xfs_iget(mp, tp, inum,
|
||||
XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
|
||||
error = xfs_iget(mp, tp, inum, XFS_IGET_NORETRY | XCHK_IGET_FLAGS, 0,
|
||||
ipp);
|
||||
if (error == -EAGAIN) {
|
||||
/*
|
||||
* The inode may be in core but temporarily unavailable and may
|
||||
|
@ -994,12 +994,6 @@ xchk_irele(
|
|||
spin_lock(&VFS_I(ip)->i_lock);
|
||||
VFS_I(ip)->i_state &= ~I_DONTCACHE;
|
||||
spin_unlock(&VFS_I(ip)->i_lock);
|
||||
} else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
|
||||
/*
|
||||
* If this is the last reference to the inode and the caller
|
||||
* permits it, set DONTCACHE to avoid thrashing.
|
||||
*/
|
||||
d_mark_dontcache(VFS_I(ip));
|
||||
}
|
||||
|
||||
xfs_irele(ip);
|
||||
|
|
|
@ -17,6 +17,13 @@ struct xfs_scrub;
|
|||
#define XCHK_GFP_FLAGS ((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
|
||||
__GFP_RETRY_MAYFAIL))
|
||||
|
||||
/*
|
||||
* For opening files by handle for fsck operations, we don't trust the inumber
|
||||
* or the allocation state; therefore, perform an untrusted lookup. We don't
|
||||
* want these inodes to pollute the cache, so mark them for immediate removal.
|
||||
*/
|
||||
#define XCHK_IGET_FLAGS (XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE)
|
||||
|
||||
/* Type info and names for the scrub types. */
|
||||
enum xchk_type {
|
||||
ST_NONE = 1, /* disabled */
|
||||
|
|
|
@ -233,45 +233,6 @@ xfs_imap_valid(
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pass in a dellalloc extent and convert it to real extents, return the real
|
||||
* extent that maps offset_fsb in wpc->iomap.
|
||||
*
|
||||
* The current page is held locked so nothing could have removed the block
|
||||
* backing offset_fsb, although it could have moved from the COW to the data
|
||||
* fork by another thread.
|
||||
*/
|
||||
static int
|
||||
xfs_convert_blocks(
|
||||
struct iomap_writepage_ctx *wpc,
|
||||
struct xfs_inode *ip,
|
||||
int whichfork,
|
||||
loff_t offset)
|
||||
{
|
||||
int error;
|
||||
unsigned *seq;
|
||||
|
||||
if (whichfork == XFS_COW_FORK)
|
||||
seq = &XFS_WPC(wpc)->cow_seq;
|
||||
else
|
||||
seq = &XFS_WPC(wpc)->data_seq;
|
||||
|
||||
/*
|
||||
* Attempt to allocate whatever delalloc extent currently backs offset
|
||||
* and put the result into wpc->iomap. Allocate in a loop because it
|
||||
* may take several attempts to allocate real blocks for a contiguous
|
||||
* delalloc extent if free space is sufficiently fragmented.
|
||||
*/
|
||||
do {
|
||||
error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
|
||||
&wpc->iomap, seq);
|
||||
if (error)
|
||||
return error;
|
||||
} while (wpc->iomap.offset + wpc->iomap.length <= offset);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
xfs_map_blocks(
|
||||
struct iomap_writepage_ctx *wpc,
|
||||
|
@ -289,6 +250,7 @@ xfs_map_blocks(
|
|||
struct xfs_iext_cursor icur;
|
||||
int retries = 0;
|
||||
int error = 0;
|
||||
unsigned int *seq;
|
||||
|
||||
if (xfs_is_shutdown(mp))
|
||||
return -EIO;
|
||||
|
@ -386,7 +348,19 @@ retry:
|
|||
trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
|
||||
return 0;
|
||||
allocate_blocks:
|
||||
error = xfs_convert_blocks(wpc, ip, whichfork, offset);
|
||||
/*
|
||||
* Convert a dellalloc extent to a real one. The current page is held
|
||||
* locked so nothing could have removed the block backing offset_fsb,
|
||||
* although it could have moved from the COW to the data fork by another
|
||||
* thread.
|
||||
*/
|
||||
if (whichfork == XFS_COW_FORK)
|
||||
seq = &XFS_WPC(wpc)->cow_seq;
|
||||
else
|
||||
seq = &XFS_WPC(wpc)->data_seq;
|
||||
|
||||
error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
|
||||
&wpc->iomap, seq);
|
||||
if (error) {
|
||||
/*
|
||||
* If we failed to find the extent in the COW fork we might have
|
||||
|
|
|
@ -510,6 +510,9 @@ xfs_attri_validate(
|
|||
unsigned int op = attrp->alfi_op_flags &
|
||||
XFS_ATTRI_OP_FLAGS_TYPE_MASK;
|
||||
|
||||
if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
|
||||
return false;
|
||||
|
||||
if (attrp->__pad != 0)
|
||||
return false;
|
||||
|
||||
|
@ -519,6 +522,10 @@ xfs_attri_validate(
|
|||
if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK)
|
||||
return false;
|
||||
|
||||
if (!xfs_attr_check_namespace(attrp->alfi_attr_filter &
|
||||
XFS_ATTR_NSP_ONDISK_MASK))
|
||||
return false;
|
||||
|
||||
/* alfi_op_flags should be either a set or remove */
|
||||
switch (op) {
|
||||
case XFS_ATTRI_OP_FLAGS_SET:
|
||||
|
@ -569,7 +576,8 @@ xfs_attri_item_recover(
|
|||
*/
|
||||
attrp = &attrip->attri_format;
|
||||
if (!xfs_attri_validate(mp, attrp) ||
|
||||
!xfs_attr_namecheck(nv->name.i_addr, nv->name.i_len))
|
||||
!xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.i_addr,
|
||||
nv->name.i_len))
|
||||
return -EFSCORRUPTED;
|
||||
|
||||
error = xlog_recover_iget(mp, attrp->alfi_ino, &ip);
|
||||
|
@ -602,8 +610,6 @@ xfs_attri_item_recover(
|
|||
args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
|
||||
XFS_DA_OP_LOGGED;
|
||||
|
||||
ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb));
|
||||
|
||||
switch (attr->xattri_op_flags) {
|
||||
case XFS_ATTRI_OP_FLAGS_SET:
|
||||
case XFS_ATTRI_OP_FLAGS_REPLACE:
|
||||
|
@ -718,48 +724,112 @@ xlog_recover_attri_commit_pass2(
|
|||
const void *attr_value = NULL;
|
||||
const void *attr_name;
|
||||
size_t len;
|
||||
|
||||
attri_formatp = item->ri_buf[0].i_addr;
|
||||
attr_name = item->ri_buf[1].i_addr;
|
||||
unsigned int op, i = 0;
|
||||
|
||||
/* Validate xfs_attri_log_format before the large memory allocation */
|
||||
len = sizeof(struct xfs_attri_log_format);
|
||||
if (item->ri_buf[0].i_len != len) {
|
||||
if (item->ri_buf[i].i_len != len) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
attri_formatp = item->ri_buf[i].i_addr;
|
||||
if (!xfs_attri_validate(mp, attri_formatp)) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
/* Check the number of log iovecs makes sense for the op code. */
|
||||
op = attri_formatp->alfi_op_flags & XFS_ATTRI_OP_FLAGS_TYPE_MASK;
|
||||
switch (op) {
|
||||
case XFS_ATTRI_OP_FLAGS_SET:
|
||||
case XFS_ATTRI_OP_FLAGS_REPLACE:
|
||||
/* Log item, attr name, attr value */
|
||||
if (item->ri_total != 3) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
break;
|
||||
case XFS_ATTRI_OP_FLAGS_REMOVE:
|
||||
/* Log item, attr name */
|
||||
if (item->ri_total != 2) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
i++;
|
||||
|
||||
/* Validate the attr name */
|
||||
if (item->ri_buf[1].i_len !=
|
||||
if (item->ri_buf[i].i_len !=
|
||||
xlog_calc_iovec_len(attri_formatp->alfi_name_len)) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
if (!xfs_attr_namecheck(attr_name, attri_formatp->alfi_name_len)) {
|
||||
attr_name = item->ri_buf[i].i_addr;
|
||||
if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, attr_name,
|
||||
attri_formatp->alfi_name_len)) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
item->ri_buf[1].i_addr, item->ri_buf[1].i_len);
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
i++;
|
||||
|
||||
/* Validate the attr value, if present */
|
||||
if (attri_formatp->alfi_value_len != 0) {
|
||||
if (item->ri_buf[2].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
|
||||
if (item->ri_buf[i].i_len != xlog_calc_iovec_len(attri_formatp->alfi_value_len)) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
item->ri_buf[0].i_addr,
|
||||
item->ri_buf[0].i_len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
attr_value = item->ri_buf[2].i_addr;
|
||||
attr_value = item->ri_buf[i].i_addr;
|
||||
i++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure we got the correct number of buffers for the operation
|
||||
* that we just loaded.
|
||||
*/
|
||||
if (i != item->ri_total) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case XFS_ATTRI_OP_FLAGS_REMOVE:
|
||||
/* Regular remove operations operate only on names. */
|
||||
if (attr_value != NULL || attri_formatp->alfi_value_len != 0) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
fallthrough;
|
||||
case XFS_ATTRI_OP_FLAGS_SET:
|
||||
case XFS_ATTRI_OP_FLAGS_REPLACE:
|
||||
/*
|
||||
* Regular xattr set/remove/replace operations require a name
|
||||
* and do not take a newname. Values are optional for set and
|
||||
* replace.
|
||||
*/
|
||||
if (attr_name == NULL || attri_formatp->alfi_name_len == 0) {
|
||||
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
|
||||
attri_formatp, len);
|
||||
return -EFSCORRUPTED;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -82,7 +82,8 @@ xfs_attr_shortform_list(
|
|||
(dp->i_af.if_bytes + sf->hdr.count * 16) < context->bufsize)) {
|
||||
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
|
||||
if (XFS_IS_CORRUPT(context->dp->i_mount,
|
||||
!xfs_attr_namecheck(sfe->nameval,
|
||||
!xfs_attr_namecheck(sfe->flags,
|
||||
sfe->nameval,
|
||||
sfe->namelen)))
|
||||
return -EFSCORRUPTED;
|
||||
context->put_listent(context,
|
||||
|
@ -120,7 +121,8 @@ xfs_attr_shortform_list(
|
|||
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
|
||||
if (unlikely(
|
||||
((char *)sfe < (char *)sf) ||
|
||||
((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)))) {
|
||||
((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)) ||
|
||||
!xfs_attr_check_namespace(sfe->flags))) {
|
||||
XFS_CORRUPTION_ERROR("xfs_attr_shortform_list",
|
||||
XFS_ERRLEVEL_LOW,
|
||||
context->dp->i_mount, sfe,
|
||||
|
@ -174,7 +176,7 @@ xfs_attr_shortform_list(
|
|||
cursor->offset = 0;
|
||||
}
|
||||
if (XFS_IS_CORRUPT(context->dp->i_mount,
|
||||
!xfs_attr_namecheck(sbp->name,
|
||||
!xfs_attr_namecheck(sbp->flags, sbp->name,
|
||||
sbp->namelen))) {
|
||||
error = -EFSCORRUPTED;
|
||||
goto out;
|
||||
|
@ -465,7 +467,8 @@ xfs_attr3_leaf_list_int(
|
|||
}
|
||||
|
||||
if (XFS_IS_CORRUPT(context->dp->i_mount,
|
||||
!xfs_attr_namecheck(name, namelen)))
|
||||
!xfs_attr_namecheck(entry->flags, name,
|
||||
namelen)))
|
||||
return -EFSCORRUPTED;
|
||||
context->put_listent(context, entry->flags,
|
||||
name, namelen, valuelen);
|
||||
|
|
|
@ -636,13 +636,11 @@ out_unlock:
|
|||
|
||||
/*
|
||||
* Test whether it is appropriate to check an inode for and free post EOF
|
||||
* blocks. The 'force' parameter determines whether we should also consider
|
||||
* regular files that are marked preallocated or append-only.
|
||||
* blocks.
|
||||
*/
|
||||
bool
|
||||
xfs_can_free_eofblocks(
|
||||
struct xfs_inode *ip,
|
||||
bool force)
|
||||
struct xfs_inode *ip)
|
||||
{
|
||||
struct xfs_bmbt_irec imap;
|
||||
struct xfs_mount *mp = ip->i_mount;
|
||||
|
@ -676,11 +674,11 @@ xfs_can_free_eofblocks(
|
|||
return false;
|
||||
|
||||
/*
|
||||
* Do not free real preallocated or append-only files unless the file
|
||||
* has delalloc blocks and we are forced to remove them.
|
||||
* Only free real extents for inodes with persistent preallocations or
|
||||
* the append-only flag.
|
||||
*/
|
||||
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
|
||||
if (!force || ip->i_delayed_blks == 0)
|
||||
if (ip->i_delayed_blks == 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
@ -734,6 +732,22 @@ xfs_free_eofblocks(
|
|||
/* Wait on dio to ensure i_size has settled. */
|
||||
inode_dio_wait(VFS_I(ip));
|
||||
|
||||
/*
|
||||
* For preallocated files only free delayed allocations.
|
||||
*
|
||||
* Note that this means we also leave speculative preallocations in
|
||||
* place for preallocated files.
|
||||
*/
|
||||
if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
|
||||
if (ip->i_delayed_blks) {
|
||||
xfs_bmap_punch_delalloc_range(ip,
|
||||
round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
|
||||
LLONG_MAX);
|
||||
}
|
||||
xfs_inode_clear_eofblocks_tag(ip);
|
||||
return 0;
|
||||
}
|
||||
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
|
||||
if (error) {
|
||||
ASSERT(xfs_is_shutdown(mp));
|
||||
|
@ -868,33 +882,32 @@ xfs_alloc_file_space(
|
|||
if (error)
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* If the allocator cannot find a single free extent large
|
||||
* enough to cover the start block of the requested range,
|
||||
* xfs_bmapi_write will return -ENOSR.
|
||||
*
|
||||
* In that case we simply need to keep looping with the same
|
||||
* startoffset_fsb so that one of the following allocations
|
||||
* will eventually reach the requested range.
|
||||
*/
|
||||
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
|
||||
allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
|
||||
&nimaps);
|
||||
if (error)
|
||||
goto error;
|
||||
if (error) {
|
||||
if (error != -ENOSR)
|
||||
goto error;
|
||||
error = 0;
|
||||
} else {
|
||||
startoffset_fsb += imapp->br_blockcount;
|
||||
allocatesize_fsb -= imapp->br_blockcount;
|
||||
}
|
||||
|
||||
ip->i_diflags |= XFS_DIFLAG_PREALLOC;
|
||||
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
|
||||
|
||||
error = xfs_trans_commit(tp);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
if (error)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If the allocator cannot find a single free extent large
|
||||
* enough to cover the start block of the requested range,
|
||||
* xfs_bmapi_write will return 0 but leave *nimaps set to 0.
|
||||
*
|
||||
* In that case we simply need to keep looping with the same
|
||||
* startoffset_fsb so that one of the following allocations
|
||||
* will eventually reach the requested range.
|
||||
*/
|
||||
if (nimaps) {
|
||||
startoffset_fsb += imapp->br_blockcount;
|
||||
allocatesize_fsb -= imapp->br_blockcount;
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
|
@ -1049,7 +1062,7 @@ xfs_prepare_shift(
|
|||
* Trim eofblocks to avoid shifting uninitialized post-eof preallocation
|
||||
* into the accessible region of the file.
|
||||
*/
|
||||
if (xfs_can_free_eofblocks(ip, true)) {
|
||||
if (xfs_can_free_eofblocks(ip)) {
|
||||
error = xfs_free_eofblocks(ip);
|
||||
if (error)
|
||||
return error;
|
||||
|
|
|
@ -63,7 +63,7 @@ int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
|
|||
xfs_off_t len);
|
||||
|
||||
/* EOF block manipulation functions */
|
||||
bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
|
||||
bool xfs_can_free_eofblocks(struct xfs_inode *ip);
|
||||
int xfs_free_eofblocks(struct xfs_inode *ip);
|
||||
|
||||
int xfs_swap_extents(struct xfs_inode *ip, struct xfs_inode *tip,
|
||||
|
|
|
@ -333,7 +333,6 @@ xfs_dquot_disk_alloc(
|
|||
goto err_cancel;
|
||||
|
||||
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
|
||||
ASSERT(nmaps == 1);
|
||||
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
|
||||
(map.br_startblock != HOLESTARTBLOCK));
|
||||
|
||||
|
|
|
@ -1149,7 +1149,7 @@ xfs_inode_free_eofblocks(
|
|||
}
|
||||
*lockflags |= XFS_IOLOCK_EXCL;
|
||||
|
||||
if (xfs_can_free_eofblocks(ip, false))
|
||||
if (xfs_can_free_eofblocks(ip))
|
||||
return xfs_free_eofblocks(ip);
|
||||
|
||||
/* inode could be preallocated or append-only */
|
||||
|
|
|
@ -1469,7 +1469,7 @@ xfs_release(
|
|||
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
|
||||
return 0;
|
||||
|
||||
if (xfs_can_free_eofblocks(ip, false)) {
|
||||
if (xfs_can_free_eofblocks(ip)) {
|
||||
/*
|
||||
* Check if the inode is being opened, written and closed
|
||||
* frequently and we have delayed allocation blocks outstanding
|
||||
|
@ -1685,15 +1685,13 @@ xfs_inode_needs_inactive(
|
|||
|
||||
/*
|
||||
* This file isn't being freed, so check if there are post-eof blocks
|
||||
* to free. @force is true because we are evicting an inode from the
|
||||
* cache. Post-eof blocks must be freed, lest we end up with broken
|
||||
* free space accounting.
|
||||
* to free.
|
||||
*
|
||||
* Note: don't bother with iolock here since lockdep complains about
|
||||
* acquiring it in reclaim context. We have the only reference to the
|
||||
* inode at this point anyways.
|
||||
*/
|
||||
return xfs_can_free_eofblocks(ip, true);
|
||||
return xfs_can_free_eofblocks(ip);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1741,15 +1739,11 @@ xfs_inactive(
|
|||
|
||||
if (VFS_I(ip)->i_nlink != 0) {
|
||||
/*
|
||||
* force is true because we are evicting an inode from the
|
||||
* cache. Post-eof blocks must be freed, lest we end up with
|
||||
* broken free space accounting.
|
||||
*
|
||||
* Note: don't bother with iolock here since lockdep complains
|
||||
* about acquiring it in reclaim context. We have the only
|
||||
* reference to the inode at this point anyways.
|
||||
*/
|
||||
if (xfs_can_free_eofblocks(ip, true))
|
||||
if (xfs_can_free_eofblocks(ip))
|
||||
error = xfs_free_eofblocks(ip);
|
||||
|
||||
goto out;
|
||||
|
@ -2329,11 +2323,26 @@ xfs_ifree_cluster(
|
|||
* This buffer may not have been correctly initialised as we
|
||||
* didn't read it from disk. That's not important because we are
|
||||
* only using to mark the buffer as stale in the log, and to
|
||||
* attach stale cached inodes on it. That means it will never be
|
||||
* dispatched for IO. If it is, we want to know about it, and we
|
||||
* want it to fail. We can acheive this by adding a write
|
||||
* verifier to the buffer.
|
||||
* attach stale cached inodes on it.
|
||||
*
|
||||
* For the inode that triggered the cluster freeing, this
|
||||
* attachment may occur in xfs_inode_item_precommit() after we
|
||||
* have marked this buffer stale. If this buffer was not in
|
||||
* memory before xfs_ifree_cluster() started, it will not be
|
||||
* marked XBF_DONE and this will cause problems later in
|
||||
* xfs_inode_item_precommit() when we trip over a (stale, !done)
|
||||
* buffer to attached to the transaction.
|
||||
*
|
||||
* Hence we have to mark the buffer as XFS_DONE here. This is
|
||||
* safe because we are also marking the buffer as XBF_STALE and
|
||||
* XFS_BLI_STALE. That means it will never be dispatched for
|
||||
* IO and it won't be unlocked until the cluster freeing has
|
||||
* been committed to the journal and the buffer unpinned. If it
|
||||
* is written, we want to know about it, and we want it to
|
||||
* fail. We can acheive this by adding a write verifier to the
|
||||
* buffer.
|
||||
*/
|
||||
bp->b_flags |= XBF_DONE;
|
||||
bp->b_ops = &xfs_inode_buf_ops;
|
||||
|
||||
/*
|
||||
|
|
|
@ -317,14 +317,6 @@ xfs_iomap_write_direct(
|
|||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Copy any maps to caller's array and return any error.
|
||||
*/
|
||||
if (nimaps == 0) {
|
||||
error = -ENOSPC;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
|
||||
error = xfs_alert_fsblock_zero(ip, imap);
|
||||
|
||||
|
@ -1013,6 +1005,24 @@ xfs_buffered_write_iomap_begin(
|
|||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* For zeroing, trim a delalloc extent that extends beyond the EOF
|
||||
* block. If it starts beyond the EOF block, convert it to an
|
||||
* unwritten extent.
|
||||
*/
|
||||
if ((flags & IOMAP_ZERO) && imap.br_startoff <= offset_fsb &&
|
||||
isnullstartblock(imap.br_startblock)) {
|
||||
xfs_fileoff_t eof_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
|
||||
|
||||
if (offset_fsb >= eof_fsb)
|
||||
goto convert_delay;
|
||||
if (end_fsb > eof_fsb) {
|
||||
end_fsb = eof_fsb;
|
||||
xfs_trim_extent(&imap, offset_fsb,
|
||||
end_fsb - offset_fsb);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Search the COW fork extent list even if we did not find a data fork
|
||||
* extent. This serves two purposes: first this implements the
|
||||
|
@ -1117,47 +1127,48 @@ xfs_buffered_write_iomap_begin(
|
|||
}
|
||||
}
|
||||
|
||||
retry:
|
||||
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
||||
end_fsb - offset_fsb, prealloc_blocks,
|
||||
allocfork == XFS_DATA_FORK ? &imap : &cmap,
|
||||
allocfork == XFS_DATA_FORK ? &icur : &ccur,
|
||||
allocfork == XFS_DATA_FORK ? eof : cow_eof);
|
||||
switch (error) {
|
||||
case 0:
|
||||
break;
|
||||
case -ENOSPC:
|
||||
case -EDQUOT:
|
||||
/* retry without any preallocation */
|
||||
trace_xfs_delalloc_enospc(ip, offset, count);
|
||||
if (prealloc_blocks) {
|
||||
prealloc_blocks = 0;
|
||||
goto retry;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (allocfork == XFS_COW_FORK) {
|
||||
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
||||
end_fsb - offset_fsb, prealloc_blocks, &cmap,
|
||||
&ccur, cow_eof);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
|
||||
goto found_cow;
|
||||
}
|
||||
|
||||
error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
|
||||
end_fsb - offset_fsb, prealloc_blocks, &imap, &icur,
|
||||
eof);
|
||||
if (error)
|
||||
goto out_unlock;
|
||||
|
||||
/*
|
||||
* Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
|
||||
* them out if the write happens to fail.
|
||||
*/
|
||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
|
||||
|
||||
found_imap:
|
||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
|
||||
|
||||
convert_delay:
|
||||
xfs_iunlock(ip, lockmode);
|
||||
truncate_pagecache(inode, offset);
|
||||
error = xfs_bmapi_convert_delalloc(ip, XFS_DATA_FORK, offset,
|
||||
iomap, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
trace_xfs_iomap_alloc(ip, offset, count, XFS_DATA_FORK, &imap);
|
||||
return 0;
|
||||
|
||||
found_cow:
|
||||
seq = xfs_iomap_inode_sequence(ip, 0);
|
||||
if (imap.br_startoff <= offset_fsb) {
|
||||
|
@ -1165,17 +1176,17 @@ found_cow:
|
|||
if (error)
|
||||
goto out_unlock;
|
||||
seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
|
||||
IOMAP_F_SHARED, seq);
|
||||
}
|
||||
|
||||
xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
|
||||
|
||||
out_unlock:
|
||||
xfs_iunlock(ip, XFS_ILOCK_EXCL);
|
||||
xfs_iunlock(ip, lockmode);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
|
|
@ -429,13 +429,6 @@ xfs_reflink_fill_cow_hole(
|
|||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Allocation succeeded but the requested range was not even partially
|
||||
* satisfied? Bail out!
|
||||
*/
|
||||
if (nimaps == 0)
|
||||
return -ENOSPC;
|
||||
|
||||
convert:
|
||||
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
|
||||
|
||||
|
@ -498,13 +491,6 @@ xfs_reflink_fill_delalloc(
|
|||
error = xfs_trans_commit(tp);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/*
|
||||
* Allocation succeeded but the requested range was not even
|
||||
* partially satisfied? Bail out!
|
||||
*/
|
||||
if (nimaps == 0)
|
||||
return -ENOSPC;
|
||||
} while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
|
||||
|
||||
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
|
||||
|
@ -730,12 +716,6 @@ xfs_reflink_end_cow_extent(
|
|||
int nmaps;
|
||||
int error;
|
||||
|
||||
/* No COW extents? That's easy! */
|
||||
if (ifp->if_bytes == 0) {
|
||||
*offset_fsb = end_fsb;
|
||||
return 0;
|
||||
}
|
||||
|
||||
resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
|
||||
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
|
||||
XFS_TRANS_RESERVE, &tp);
|
||||
|
|
|
@ -840,8 +840,6 @@ xfs_growfs_rt_alloc(
|
|||
nmap = 1;
|
||||
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
|
||||
XFS_BMAPI_METADATA, 0, &map, &nmap);
|
||||
if (!error && nmap < 1)
|
||||
error = -ENOSPC;
|
||||
if (error)
|
||||
goto out_trans_cancel;
|
||||
/*
|
||||
|
|
|
@ -2240,6 +2240,8 @@ static inline struct maple_enode *mte_node_or_none(struct maple_enode *enode)
|
|||
|
||||
/*
|
||||
* mas_wr_node_walk() - Find the correct offset for the index in the @mas.
|
||||
* If @mas->index cannot be found within the containing
|
||||
* node, we traverse to the last entry in the node.
|
||||
* @wr_mas: The maple write state
|
||||
*
|
||||
* Uses mas_slot_locked() and does not need to worry about dead nodes.
|
||||
|
@ -3655,7 +3657,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
|
|||
return true;
|
||||
}
|
||||
|
||||
static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
|
||||
static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
|
||||
{
|
||||
struct ma_state *mas = wr_mas->mas;
|
||||
|
||||
|
@ -3664,11 +3666,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
|
|||
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
|
||||
mas->offset);
|
||||
if (ma_is_leaf(wr_mas->type))
|
||||
return true;
|
||||
return;
|
||||
mas_wr_walk_traverse(wr_mas);
|
||||
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/*
|
||||
* mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
|
||||
|
@ -3904,8 +3904,8 @@ static inline int mas_wr_spanning_store(struct ma_wr_state *wr_mas)
|
|||
memset(&b_node, 0, sizeof(struct maple_big_node));
|
||||
/* Copy l_mas and store the value in b_node. */
|
||||
mas_store_b_node(&l_wr_mas, &b_node, l_wr_mas.node_end);
|
||||
/* Copy r_mas into b_node. */
|
||||
if (r_mas.offset <= r_wr_mas.node_end)
|
||||
/* Copy r_mas into b_node if there is anything to copy. */
|
||||
if (r_mas.max > r_mas.last)
|
||||
mas_mab_cp(&r_mas, r_mas.offset, r_wr_mas.node_end,
|
||||
&b_node, b_node.b_end + 1);
|
||||
else
|
||||
|
|
|
@ -15,6 +15,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
|
|||
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
|
||||
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
|
||||
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
|
||||
SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT),
|
||||
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
|
||||
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
|
||||
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
|
||||
|
|
|
@ -8,6 +8,7 @@ enum linux_mptcp_mib_field {
|
|||
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
|
||||
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
|
||||
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
|
||||
MPTCP_MIB_MPCAPABLEENDPATTEMPT, /* Prohibited MPC to port-based endp */
|
||||
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
|
||||
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
|
||||
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
|
||||
|
|
|
@ -1125,6 +1125,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
|
|||
*/
|
||||
inet_sk_state_store(newsk, TCP_LISTEN);
|
||||
lock_sock(ssk);
|
||||
WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
|
||||
err = __inet_listen_sk(ssk, backlog);
|
||||
if (!err)
|
||||
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
|
||||
|
|
|
@ -504,6 +504,7 @@ struct mptcp_subflow_context {
|
|||
__unused : 9;
|
||||
enum mptcp_data_avail data_avail;
|
||||
bool scheduled;
|
||||
bool pm_listener; /* a listener managed by the kernel PM? */
|
||||
u32 remote_nonce;
|
||||
u64 thmac;
|
||||
u32 local_nonce;
|
||||
|
|
|
@ -132,6 +132,13 @@ static void subflow_add_reset_reason(struct sk_buff *skb, u8 reason)
|
|||
}
|
||||
}
|
||||
|
||||
static int subflow_reset_req_endp(struct request_sock *req, struct sk_buff *skb)
|
||||
{
|
||||
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEENDPATTEMPT);
|
||||
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
/* Init mptcp request socket.
|
||||
*
|
||||
* Returns an error code if a JOIN has failed and a TCP reset
|
||||
|
@ -163,6 +170,8 @@ static int subflow_check_req(struct request_sock *req,
|
|||
if (opt_mp_capable) {
|
||||
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
|
||||
|
||||
if (unlikely(listener->pm_listener))
|
||||
return subflow_reset_req_endp(req, skb);
|
||||
if (opt_mp_join)
|
||||
return 0;
|
||||
} else if (opt_mp_join) {
|
||||
|
@ -170,6 +179,8 @@ static int subflow_check_req(struct request_sock *req,
|
|||
|
||||
if (mp_opt.backup)
|
||||
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX);
|
||||
} else if (unlikely(listener->pm_listener)) {
|
||||
return subflow_reset_req_endp(req, skb);
|
||||
}
|
||||
|
||||
if (opt_mp_capable && listener->request_mptcp) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user