mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-05 05:15:23 +02:00
Merge branch 'v5.13/standard/base' into v5.13/standard/preempt-rt/base
This commit is contained in:
commit
41ea72d081
|
@ -49,8 +49,30 @@ Description:
|
|||
modification of EVM-protected metadata and
|
||||
disable all further modification of policy
|
||||
|
||||
Note that once a key has been loaded, it will no longer be
|
||||
possible to enable metadata modification.
|
||||
Echoing a value is additive, the new value is added to the
|
||||
existing initialization flags.
|
||||
|
||||
For example, after::
|
||||
|
||||
echo 2 ><securityfs>/evm
|
||||
|
||||
another echo can be performed::
|
||||
|
||||
echo 1 ><securityfs>/evm
|
||||
|
||||
and the resulting value will be 3.
|
||||
|
||||
Note that once an HMAC key has been loaded, it will no longer
|
||||
be possible to enable metadata modification. Signaling that an
|
||||
HMAC key has been loaded will clear the corresponding flag.
|
||||
For example, if the current value is 6 (2 and 4 set)::
|
||||
|
||||
echo 1 ><securityfs>/evm
|
||||
|
||||
will set the new value to 3 (4 cleared).
|
||||
|
||||
Loading an HMAC key is the only way to disable metadata
|
||||
modification.
|
||||
|
||||
Until key loading has been signaled EVM can not create
|
||||
or validate the 'security.evm' xattr, but returns
|
||||
|
|
|
@ -39,9 +39,11 @@ KernelVersion: v5.9
|
|||
Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
|
||||
Description:
|
||||
(RO) Report various performance stats related to papr-scm NVDIMM
|
||||
device. Each stat is reported on a new line with each line
|
||||
composed of a stat-identifier followed by it value. Below are
|
||||
currently known dimm performance stats which are reported:
|
||||
device. This attribute is only available for NVDIMM devices
|
||||
that support reporting NVDIMM performance stats. Each stat is
|
||||
reported on a new line with each line composed of a
|
||||
stat-identifier followed by it value. Below are currently known
|
||||
dimm performance stats which are reported:
|
||||
|
||||
* "CtlResCt" : Controller Reset Count
|
||||
* "CtlResTm" : Controller Reset Elapsed Time
|
||||
|
|
|
@ -581,6 +581,12 @@
|
|||
loops can be debugged more effectively on production
|
||||
systems.
|
||||
|
||||
clocksource.max_cswd_read_retries= [KNL]
|
||||
Number of clocksource_watchdog() retries due to
|
||||
external delays before the clock will be marked
|
||||
unstable. Defaults to three retries, that is,
|
||||
four attempts to read the clock under test.
|
||||
|
||||
clearcpuid=BITNUM[,BITNUM...] [X86]
|
||||
Disable CPUID feature X for the kernel. See
|
||||
arch/x86/include/asm/cpufeatures.h for the valid bit
|
||||
|
|
|
@ -38,6 +38,7 @@ Sysfs entries
|
|||
fan[1-12]_input RO fan tachometer speed in RPM
|
||||
fan[1-12]_fault RO fan experienced fault
|
||||
fan[1-6]_target RW desired fan speed in RPM
|
||||
pwm[1-6]_enable RW regulator mode, 0=disabled, 1=manual mode, 2=rpm mode
|
||||
pwm[1-6] RW fan target duty cycle (0-255)
|
||||
pwm[1-6]_enable RW regulator mode, 0=disabled (duty cycle=0%), 1=manual mode, 2=rpm mode
|
||||
pwm[1-6] RW read: current pwm duty cycle,
|
||||
write: target pwm duty cycle (0-255)
|
||||
================== === =======================================================
|
||||
|
|
|
@ -3053,7 +3053,7 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
|
|||
:stub-columns: 0
|
||||
:widths: 1 1 2
|
||||
|
||||
* - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT``
|
||||
* - ``V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED``
|
||||
- 0x00000001
|
||||
-
|
||||
* - ``V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT``
|
||||
|
@ -3277,6 +3277,9 @@ enum v4l2_mpeg_video_hevc_size_of_length_field -
|
|||
* - ``V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED``
|
||||
- 0x00000100
|
||||
-
|
||||
* - ``V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT``
|
||||
- 0x00000200
|
||||
-
|
||||
|
||||
.. raw:: latex
|
||||
|
||||
|
|
|
@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be
|
|||
returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should
|
||||
be the same ``id`` as in ``struct seccomp_notif``.
|
||||
|
||||
Userspace can also add file descriptors to the notifying process via
|
||||
``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of
|
||||
``struct seccomp_notif_addfd`` should be the same ``id`` as in
|
||||
``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags
|
||||
like O_EXEC on the file descriptor in the notifying process. If the supervisor
|
||||
wants to inject the file descriptor with a specific number, the
|
||||
``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to
|
||||
the specific number to use. If that file descriptor is already open in the
|
||||
notifying process it will be replaced. The supervisor can also add an FD, and
|
||||
respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return
|
||||
value will be the injected file descriptor number.
|
||||
|
||||
It is worth noting that ``struct seccomp_data`` contains the values of register
|
||||
arguments to the syscall, but does not contain pointers to memory. The task's
|
||||
memory is accessible to suitably privileged traces via ``ptrace()`` or
|
||||
|
|
4
Makefile
4
Makefile
|
@ -1,7 +1,7 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 5
|
||||
PATCHLEVEL = 13
|
||||
SUBLEVEL = 0
|
||||
SUBLEVEL = 2
|
||||
EXTRAVERSION =
|
||||
NAME = Opossums on Parade
|
||||
|
||||
|
@ -1039,7 +1039,7 @@ LDFLAGS_vmlinux += $(call ld-option, -X,)
|
|||
endif
|
||||
|
||||
ifeq ($(CONFIG_RELR),y)
|
||||
LDFLAGS_vmlinux += --pack-dyn-relocs=relr
|
||||
LDFLAGS_vmlinux += --pack-dyn-relocs=relr --use-android-relr-tags
|
||||
endif
|
||||
|
||||
# We never want expected sections to be placed heuristically by the
|
||||
|
|
|
@ -166,7 +166,6 @@ smp_callin(void)
|
|||
DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n",
|
||||
cpuid, current, current->active_mm));
|
||||
|
||||
preempt_disable();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
|
|
|
@ -189,7 +189,6 @@ void start_kernel_secondary(void)
|
|||
pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu);
|
||||
|
||||
local_irq_enable();
|
||||
preempt_disable();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
|
|
|
@ -787,7 +787,7 @@
|
|||
0xffffffff 0x3ffcfe7c 0x1c010101 /* pioA */
|
||||
0x7fffffff 0xfffccc3a 0x3f00cc3a /* pioB */
|
||||
0xffffffff 0x3ff83fff 0xff00ffff /* pioC */
|
||||
0x0003ff00 0x8002a800 0x00000000 /* pioD */
|
||||
0xb003ff00 0x8002a800 0x00000000 /* pioD */
|
||||
0xffffffff 0x7fffffff 0x76fff1bf /* pioE */
|
||||
>;
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
*/
|
||||
|
||||
#include <dt-bindings/interrupt-controller/irq.h>
|
||||
#include <dt-bindings/leds/common.h>
|
||||
#include "ste-href-family-pinctrl.dtsi"
|
||||
|
||||
/ {
|
||||
|
@ -64,17 +65,20 @@
|
|||
reg = <0>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
linux,default-trigger = "heartbeat";
|
||||
};
|
||||
chan@1 {
|
||||
reg = <1>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
};
|
||||
chan@2 {
|
||||
reg = <2>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
};
|
||||
};
|
||||
lp5521@34 {
|
||||
|
@ -88,16 +92,19 @@
|
|||
reg = <0>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
};
|
||||
chan@1 {
|
||||
reg = <1>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
};
|
||||
chan@2 {
|
||||
reg = <2>;
|
||||
led-cur = /bits/ 8 <0x2f>;
|
||||
max-cur = /bits/ 8 <0x5f>;
|
||||
color = <LED_COLOR_ID_BLUE>;
|
||||
};
|
||||
};
|
||||
bh1780@29 {
|
||||
|
|
|
@ -773,10 +773,10 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u64 value)
|
|||
pr_err("CPU%u writing wrong counter %d\n",
|
||||
smp_processor_id(), idx);
|
||||
} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" ((u32)value));
|
||||
} else {
|
||||
armv7_pmnc_select_counter(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" ((u32)value));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -432,7 +432,6 @@ asmlinkage void secondary_start_kernel(void)
|
|||
#endif
|
||||
pr_debug("CPU%u: Booted secondary processor\n", cpu);
|
||||
|
||||
preempt_disable();
|
||||
trace_hardirqs_off();
|
||||
|
||||
/*
|
||||
|
|
|
@ -134,7 +134,7 @@
|
|||
|
||||
uart0: serial@12000 {
|
||||
compatible = "marvell,armada-3700-uart";
|
||||
reg = <0x12000 0x200>;
|
||||
reg = <0x12000 0x18>;
|
||||
clocks = <&xtalclk>;
|
||||
interrupts =
|
||||
<GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>,
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
|
||||
#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
|
||||
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
|
||||
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
|
||||
|
||||
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
|
||||
KVM_DIRTY_LOG_INITIALLY_SET)
|
||||
|
|
|
@ -177,9 +177,9 @@ static inline void update_saved_ttbr0(struct task_struct *tsk,
|
|||
return;
|
||||
|
||||
if (mm == &init_mm)
|
||||
ttbr = __pa_symbol(reserved_pg_dir);
|
||||
ttbr = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
|
||||
else
|
||||
ttbr = virt_to_phys(mm->pgd) | ASID(mm) << 48;
|
||||
ttbr = phys_to_ttbr(virt_to_phys(mm->pgd)) | ASID(mm) << 48;
|
||||
|
||||
WRITE_ONCE(task_thread_info(tsk)->ttbr0, ttbr);
|
||||
}
|
||||
|
|
|
@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc)
|
|||
} while (0)
|
||||
|
||||
#define init_idle_preempt_count(p, cpu) do { \
|
||||
task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
|
||||
task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \
|
||||
} while (0)
|
||||
|
||||
static inline void set_preempt_need_resched(void)
|
||||
|
|
|
@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
|
|||
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
|
||||
CFLAGS_syscall.o += -fno-stack-protector
|
||||
|
||||
# It's not safe to invoke KCOV when portions of the kernel environment aren't
|
||||
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
|
||||
# inhibit KCOV instrumentation, disable it for the entire compilation unit.
|
||||
KCOV_INSTRUMENT_entry.o := n
|
||||
|
||||
# Object file lists.
|
||||
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
|
||||
entry-common.o entry-fpsimd.o process.o ptrace.o \
|
||||
|
|
|
@ -312,7 +312,7 @@ static ssize_t slots_show(struct device *dev, struct device_attribute *attr,
|
|||
struct arm_pmu *cpu_pmu = container_of(pmu, struct arm_pmu, pmu);
|
||||
u32 slots = cpu_pmu->reg_pmmir & ARMV8_PMU_SLOTS_MASK;
|
||||
|
||||
return snprintf(page, PAGE_SIZE, "0x%08x\n", slots);
|
||||
return sysfs_emit(page, "0x%08x\n", slots);
|
||||
}
|
||||
|
||||
static DEVICE_ATTR_RO(slots);
|
||||
|
|
|
@ -381,7 +381,7 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p)
|
|||
* faults in case uaccess_enable() is inadvertently called by the init
|
||||
* thread.
|
||||
*/
|
||||
init_task.thread_info.ttbr0 = __pa_symbol(reserved_pg_dir);
|
||||
init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir));
|
||||
#endif
|
||||
|
||||
if (boot_args[1] || boot_args[2] || boot_args[3]) {
|
||||
|
|
|
@ -224,7 +224,6 @@ asmlinkage notrace void secondary_start_kernel(void)
|
|||
init_gic_priority_masking();
|
||||
|
||||
rcu_cpu_starting(cpu);
|
||||
preempt_disable();
|
||||
trace_hardirqs_off();
|
||||
|
||||
/*
|
||||
|
|
|
@ -689,6 +689,10 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
|
|||
vgic_v4_load(vcpu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
if (kvm_check_request(KVM_REQ_RELOAD_PMU, vcpu))
|
||||
kvm_pmu_handle_pmcr(vcpu,
|
||||
__vcpu_sys_reg(vcpu, PMCR_EL0));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -578,6 +578,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
|
|||
kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0);
|
||||
|
||||
if (val & ARMV8_PMU_PMCR_P) {
|
||||
mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
|
||||
for_each_set_bit(i, &mask, 32)
|
||||
kvm_pmu_set_counter_value(vcpu, i, 0);
|
||||
}
|
||||
|
@ -850,6 +851,9 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* One-off reload of the PMU on first run */
|
||||
kvm_make_request(KVM_REQ_RELOAD_PMU, vcpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -281,7 +281,6 @@ void csky_start_secondary(void)
|
|||
pr_info("CPU%u Online: %s...\n", cpu, __func__);
|
||||
|
||||
local_irq_enable();
|
||||
preempt_disable();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
||||
|
|
|
@ -12,15 +12,17 @@ SYSCALL_DEFINE3(cacheflush,
|
|||
int, cache)
|
||||
{
|
||||
switch (cache) {
|
||||
case ICACHE:
|
||||
case BCACHE:
|
||||
flush_icache_mm_range(current->mm,
|
||||
(unsigned long)addr,
|
||||
(unsigned long)addr + bytes);
|
||||
fallthrough;
|
||||
case DCACHE:
|
||||
dcache_wb_range((unsigned long)addr,
|
||||
(unsigned long)addr + bytes);
|
||||
if (cache != BCACHE)
|
||||
break;
|
||||
fallthrough;
|
||||
case ICACHE:
|
||||
flush_icache_mm_range(current->mm,
|
||||
(unsigned long)addr,
|
||||
(unsigned long)addr + bytes);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
|
|
|
@ -343,7 +343,7 @@ init_record_index_pools(void)
|
|||
|
||||
/* - 2 - */
|
||||
sect_min_size = sal_log_sect_min_sizes[0];
|
||||
for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
|
||||
for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++)
|
||||
if (sect_min_size > sal_log_sect_min_sizes[i])
|
||||
sect_min_size = sal_log_sect_min_sizes[i];
|
||||
|
||||
|
|
|
@ -441,7 +441,6 @@ start_secondary (void *unused)
|
|||
#endif
|
||||
efi_map_pal_code();
|
||||
cpu_init();
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
|
|
@ -25,6 +25,9 @@ config ATARI
|
|||
this kernel on an Atari, say Y here and browse the material
|
||||
available in <file:Documentation/m68k>; otherwise say N.
|
||||
|
||||
config ATARI_KBD_CORE
|
||||
bool
|
||||
|
||||
config MAC
|
||||
bool "Macintosh support"
|
||||
depends on MMU
|
||||
|
|
|
@ -36,7 +36,7 @@ extern pte_t *pkmap_page_table;
|
|||
* easily, subsequent pte tables have to be allocated in one physical
|
||||
* chunk of RAM.
|
||||
*/
|
||||
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
||||
#if defined(CONFIG_PHYS_ADDR_T_64BIT) || defined(CONFIG_MIPS_HUGE_TLB_SUPPORT)
|
||||
#define LAST_PKMAP 512
|
||||
#else
|
||||
#define LAST_PKMAP 1024
|
||||
|
|
|
@ -348,7 +348,6 @@ asmlinkage void start_secondary(void)
|
|||
*/
|
||||
|
||||
calibrate_delay();
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
cpu_data[cpu].udelay_val = loops_per_jiffy;
|
||||
|
||||
|
|
|
@ -145,8 +145,6 @@ asmlinkage __init void secondary_start_kernel(void)
|
|||
set_cpu_online(cpu, true);
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
preempt_disable();
|
||||
/*
|
||||
* OK, it's off to the idle thread for us
|
||||
*/
|
||||
|
|
|
@ -302,7 +302,6 @@ void __init smp_callin(unsigned long pdce_proc)
|
|||
#endif
|
||||
|
||||
smp_cpu_init(slave_id);
|
||||
preempt_disable();
|
||||
|
||||
flush_cache_all_local(); /* start with known state */
|
||||
flush_tlb_all_local(NULL);
|
||||
|
|
|
@ -98,6 +98,36 @@ static inline int cpu_last_thread_sibling(int cpu)
|
|||
return cpu | (threads_per_core - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* tlb_thread_siblings are siblings which share a TLB. This is not
|
||||
* architected, is not something a hypervisor could emulate and a future
|
||||
* CPU may change behaviour even in compat mode, so this should only be
|
||||
* used on PowerNV, and only with care.
|
||||
*/
|
||||
static inline int cpu_first_tlb_thread_sibling(int cpu)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||
return cpu & ~0x6; /* Big Core */
|
||||
else
|
||||
return cpu_first_thread_sibling(cpu);
|
||||
}
|
||||
|
||||
static inline int cpu_last_tlb_thread_sibling(int cpu)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||
return cpu | 0x6; /* Big Core */
|
||||
else
|
||||
return cpu_last_thread_sibling(cpu);
|
||||
}
|
||||
|
||||
static inline int cpu_tlb_thread_sibling_step(void)
|
||||
{
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
|
||||
return 2; /* Big Core */
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline u32 get_tensr(void)
|
||||
{
|
||||
#ifdef CONFIG_BOOKE
|
||||
|
|
|
@ -186,6 +186,7 @@ struct interrupt_nmi_state {
|
|||
u8 irq_soft_mask;
|
||||
u8 irq_happened;
|
||||
u8 ftrace_enabled;
|
||||
u64 softe;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
@ -211,6 +212,7 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
|
|||
#ifdef CONFIG_PPC64
|
||||
state->irq_soft_mask = local_paca->irq_soft_mask;
|
||||
state->irq_happened = local_paca->irq_happened;
|
||||
state->softe = regs->softe;
|
||||
|
||||
/*
|
||||
* Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
|
||||
|
@ -263,6 +265,7 @@ static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct inter
|
|||
|
||||
/* Check we didn't change the pending interrupt mask. */
|
||||
WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
|
||||
regs->softe = state->softe;
|
||||
local_paca->irq_happened = state->irq_happened;
|
||||
local_paca->irq_soft_mask = state->irq_soft_mask;
|
||||
#endif
|
||||
|
|
|
@ -16,10 +16,10 @@ static inline bool is_kvm_guest(void)
|
|||
return static_branch_unlikely(&kvm_guest);
|
||||
}
|
||||
|
||||
bool check_kvm_guest(void);
|
||||
int check_kvm_guest(void);
|
||||
#else
|
||||
static inline bool is_kvm_guest(void) { return false; }
|
||||
static inline bool check_kvm_guest(void) { return false; }
|
||||
static inline int check_kvm_guest(void) { return 0; }
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_POWERPC_KVM_GUEST_H_ */
|
||||
|
|
|
@ -23,18 +23,20 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features);
|
|||
|
||||
#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
|
||||
DEFINE_STATIC_KEY_FALSE(kvm_guest);
|
||||
bool check_kvm_guest(void)
|
||||
int __init check_kvm_guest(void)
|
||||
{
|
||||
struct device_node *hyper_node;
|
||||
|
||||
hyper_node = of_find_node_by_path("/hypervisor");
|
||||
if (!hyper_node)
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
if (!of_device_is_compatible(hyper_node, "linux,kvm"))
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
static_branch_enable(&kvm_guest);
|
||||
return true;
|
||||
|
||||
return 0;
|
||||
}
|
||||
core_initcall(check_kvm_guest); // before kvm_guest_init()
|
||||
#endif
|
||||
|
|
|
@ -481,12 +481,11 @@ static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int mce_handle_ierror(struct pt_regs *regs,
|
||||
static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
|
||||
const struct mce_ierror_table table[],
|
||||
struct mce_error_info *mce_err, uint64_t *addr,
|
||||
uint64_t *phys_addr)
|
||||
{
|
||||
uint64_t srr1 = regs->msr;
|
||||
int handled = 0;
|
||||
int i;
|
||||
|
||||
|
@ -695,19 +694,19 @@ static long mce_handle_ue_error(struct pt_regs *regs,
|
|||
}
|
||||
|
||||
static long mce_handle_error(struct pt_regs *regs,
|
||||
unsigned long srr1,
|
||||
const struct mce_derror_table dtable[],
|
||||
const struct mce_ierror_table itable[])
|
||||
{
|
||||
struct mce_error_info mce_err = { 0 };
|
||||
uint64_t addr, phys_addr = ULONG_MAX;
|
||||
uint64_t srr1 = regs->msr;
|
||||
long handled;
|
||||
|
||||
if (SRR1_MC_LOADSTORE(srr1))
|
||||
handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
|
||||
&phys_addr);
|
||||
else
|
||||
handled = mce_handle_ierror(regs, itable, &mce_err, &addr,
|
||||
handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
|
||||
&phys_addr);
|
||||
|
||||
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
|
||||
|
@ -723,16 +722,20 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
|
|||
/* P7 DD1 leaves top bits of DSISR undefined */
|
||||
regs->dsisr &= 0x0000ffff;
|
||||
|
||||
return mce_handle_error(regs, mce_p7_derror_table, mce_p7_ierror_table);
|
||||
return mce_handle_error(regs, regs->msr,
|
||||
mce_p7_derror_table, mce_p7_ierror_table);
|
||||
}
|
||||
|
||||
long __machine_check_early_realmode_p8(struct pt_regs *regs)
|
||||
{
|
||||
return mce_handle_error(regs, mce_p8_derror_table, mce_p8_ierror_table);
|
||||
return mce_handle_error(regs, regs->msr,
|
||||
mce_p8_derror_table, mce_p8_ierror_table);
|
||||
}
|
||||
|
||||
long __machine_check_early_realmode_p9(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long srr1 = regs->msr;
|
||||
|
||||
/*
|
||||
* On POWER9 DD2.1 and below, it's possible to get a machine check
|
||||
* caused by a paste instruction where only DSISR bit 25 is set. This
|
||||
|
@ -746,10 +749,39 @@ long __machine_check_early_realmode_p9(struct pt_regs *regs)
|
|||
if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
|
||||
return 1;
|
||||
|
||||
return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
|
||||
/*
|
||||
* Async machine check due to bad real address from store or foreign
|
||||
* link time out comes with the load/store bit (PPC bit 42) set in
|
||||
* SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
|
||||
* directed to the ierror table so it will find the cause (which
|
||||
* describes it correctly as a store error).
|
||||
*/
|
||||
if (SRR1_MC_LOADSTORE(srr1) &&
|
||||
((srr1 & 0x081c0000) == 0x08140000 ||
|
||||
(srr1 & 0x081c0000) == 0x08180000)) {
|
||||
srr1 &= ~PPC_BIT(42);
|
||||
}
|
||||
|
||||
return mce_handle_error(regs, srr1,
|
||||
mce_p9_derror_table, mce_p9_ierror_table);
|
||||
}
|
||||
|
||||
long __machine_check_early_realmode_p10(struct pt_regs *regs)
|
||||
{
|
||||
return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table);
|
||||
unsigned long srr1 = regs->msr;
|
||||
|
||||
/*
|
||||
* Async machine check due to bad real address from store comes with
|
||||
* the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
|
||||
* SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
|
||||
* so it will find the cause (which describes it correctly as a store
|
||||
* error).
|
||||
*/
|
||||
if (SRR1_MC_LOADSTORE(srr1) &&
|
||||
(srr1 & 0x081c0000) == 0x08140000) {
|
||||
srr1 &= ~PPC_BIT(42);
|
||||
}
|
||||
|
||||
return mce_handle_error(regs, srr1,
|
||||
mce_p10_derror_table, mce_p10_ierror_table);
|
||||
}
|
||||
|
|
|
@ -1213,6 +1213,19 @@ struct task_struct *__switch_to(struct task_struct *prev,
|
|||
__flush_tlb_pending(batch);
|
||||
batch->active = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* On POWER9 the copy-paste buffer can only paste into
|
||||
* foreign real addresses, so unprivileged processes can not
|
||||
* see the data or use it in any way unless they have
|
||||
* foreign real mappings. If the new process has the foreign
|
||||
* real address mappings, we must issue a cp_abort to clear
|
||||
* any state and prevent snooping, corruption or a covert
|
||||
* channel. ISA v3.1 supports paste into local memory.
|
||||
*/
|
||||
if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
|
||||
atomic_read(&new->mm->context.vas_windows)))
|
||||
asm volatile(PPC_CP_ABORT);
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
|
||||
|
@ -1261,30 +1274,33 @@ struct task_struct *__switch_to(struct task_struct *prev,
|
|||
#endif
|
||||
last = _switch(old_thread, new_thread);
|
||||
|
||||
/*
|
||||
* Nothing after _switch will be run for newly created tasks,
|
||||
* because they switch directly to ret_from_fork/ret_from_kernel_thread
|
||||
* etc. Code added here should have a comment explaining why that is
|
||||
* okay.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/*
|
||||
* This applies to a process that was context switched while inside
|
||||
* arch_enter_lazy_mmu_mode(), to re-activate the batch that was
|
||||
* deactivated above, before _switch(). This will never be the case
|
||||
* for new tasks.
|
||||
*/
|
||||
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
|
||||
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
|
||||
batch = this_cpu_ptr(&ppc64_tlb_batch);
|
||||
batch->active = 1;
|
||||
}
|
||||
|
||||
if (current->thread.regs) {
|
||||
/*
|
||||
* Math facilities are masked out of the child MSR in copy_thread.
|
||||
* A new task does not need to restore_math because it will
|
||||
* demand fault them.
|
||||
*/
|
||||
if (current->thread.regs)
|
||||
restore_math(current->thread.regs);
|
||||
|
||||
/*
|
||||
* On POWER9 the copy-paste buffer can only paste into
|
||||
* foreign real addresses, so unprivileged processes can not
|
||||
* see the data or use it in any way unless they have
|
||||
* foreign real mappings. If the new process has the foreign
|
||||
* real address mappings, we must issue a cp_abort to clear
|
||||
* any state and prevent snooping, corruption or a covert
|
||||
* channel. ISA v3.1 supports paste into local memory.
|
||||
*/
|
||||
if (current->mm &&
|
||||
(cpu_has_feature(CPU_FTR_ARCH_31) ||
|
||||
atomic_read(¤t->mm->context.vas_windows)))
|
||||
asm volatile(PPC_CP_ABORT);
|
||||
}
|
||||
#endif /* CONFIG_PPC_BOOK3S_64 */
|
||||
|
||||
return last;
|
||||
|
|
|
@ -624,6 +624,8 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
|
|||
/*
|
||||
* IRQs are already hard disabled by the smp_handle_nmi_ipi.
|
||||
*/
|
||||
set_cpu_online(smp_processor_id(), false);
|
||||
|
||||
spin_begin();
|
||||
while (1)
|
||||
spin_cpu_relax();
|
||||
|
@ -639,6 +641,15 @@ void smp_send_stop(void)
|
|||
static void stop_this_cpu(void *dummy)
|
||||
{
|
||||
hard_irq_disable();
|
||||
|
||||
/*
|
||||
* Offlining CPUs in stop_this_cpu can result in scheduler warnings,
|
||||
* (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
|
||||
* to know other CPUs are offline before it breaks locks to flush
|
||||
* printk buffers, in case we panic()ed while holding the lock.
|
||||
*/
|
||||
set_cpu_online(smp_processor_id(), false);
|
||||
|
||||
spin_begin();
|
||||
while (1)
|
||||
spin_cpu_relax();
|
||||
|
@ -1552,7 +1563,6 @@ void start_secondary(void *unused)
|
|||
smp_store_cpu_info(cpu);
|
||||
set_dec(tb_ticks_per_jiffy);
|
||||
rcu_cpu_starting(cpu);
|
||||
preempt_disable();
|
||||
cpu_callin_map[cpu] = 1;
|
||||
|
||||
if (smp_ops->setup_cpu)
|
||||
|
|
|
@ -172,17 +172,31 @@ static void handle_backtrace_ipi(struct pt_regs *regs)
|
|||
|
||||
static void raise_backtrace_ipi(cpumask_t *mask)
|
||||
{
|
||||
struct paca_struct *p;
|
||||
unsigned int cpu;
|
||||
u64 delay_us;
|
||||
|
||||
for_each_cpu(cpu, mask) {
|
||||
if (cpu == smp_processor_id())
|
||||
if (cpu == smp_processor_id()) {
|
||||
handle_backtrace_ipi(NULL);
|
||||
else
|
||||
smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, 5 * USEC_PER_SEC);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
for_each_cpu(cpu, mask) {
|
||||
struct paca_struct *p = paca_ptrs[cpu];
|
||||
delay_us = 5 * USEC_PER_SEC;
|
||||
|
||||
if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
|
||||
// Now wait up to 5s for the other CPU to do its backtrace
|
||||
while (cpumask_test_cpu(cpu, mask) && delay_us) {
|
||||
udelay(1);
|
||||
delay_us--;
|
||||
}
|
||||
|
||||
// Other CPU cleared itself from the mask
|
||||
if (delay_us)
|
||||
continue;
|
||||
}
|
||||
|
||||
p = paca_ptrs[cpu];
|
||||
|
||||
cpumask_clear_cpu(cpu, mask);
|
||||
|
||||
|
|
|
@ -2657,7 +2657,7 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
|
|||
cpumask_t *cpu_in_guest;
|
||||
int i;
|
||||
|
||||
cpu = cpu_first_thread_sibling(cpu);
|
||||
cpu = cpu_first_tlb_thread_sibling(cpu);
|
||||
if (nested) {
|
||||
cpumask_set_cpu(cpu, &nested->need_tlb_flush);
|
||||
cpu_in_guest = &nested->cpu_in_guest;
|
||||
|
@ -2671,9 +2671,10 @@ static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
|
|||
* the other side is the first smp_mb() in kvmppc_run_core().
|
||||
*/
|
||||
smp_mb();
|
||||
for (i = 0; i < threads_per_core; ++i)
|
||||
if (cpumask_test_cpu(cpu + i, cpu_in_guest))
|
||||
smp_call_function_single(cpu + i, do_nothing, NULL, 1);
|
||||
for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
|
||||
i += cpu_tlb_thread_sibling_step())
|
||||
if (cpumask_test_cpu(i, cpu_in_guest))
|
||||
smp_call_function_single(i, do_nothing, NULL, 1);
|
||||
}
|
||||
|
||||
static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
|
||||
|
@ -2704,8 +2705,8 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
|
|||
*/
|
||||
if (prev_cpu != pcpu) {
|
||||
if (prev_cpu >= 0 &&
|
||||
cpu_first_thread_sibling(prev_cpu) !=
|
||||
cpu_first_thread_sibling(pcpu))
|
||||
cpu_first_tlb_thread_sibling(prev_cpu) !=
|
||||
cpu_first_tlb_thread_sibling(pcpu))
|
||||
radix_flush_cpu(kvm, prev_cpu, vcpu);
|
||||
if (nested)
|
||||
nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
|
||||
|
|
|
@ -800,7 +800,7 @@ void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
|
|||
* Thus we make all 4 threads use the same bit.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
pcpu = cpu_first_thread_sibling(pcpu);
|
||||
pcpu = cpu_first_tlb_thread_sibling(pcpu);
|
||||
|
||||
if (nested)
|
||||
need_tlb_flush = &nested->need_tlb_flush;
|
||||
|
|
|
@ -53,7 +53,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
|
|||
hr->dawrx1 = vcpu->arch.dawrx1;
|
||||
}
|
||||
|
||||
static void byteswap_pt_regs(struct pt_regs *regs)
|
||||
/* Use noinline_for_stack due to https://bugs.llvm.org/show_bug.cgi?id=49610 */
|
||||
static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long *addr = (unsigned long *) regs;
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ static int global_invalidates(struct kvm *kvm)
|
|||
* so use the bit for the first thread to represent the core.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_ARCH_300))
|
||||
cpu = cpu_first_thread_sibling(cpu);
|
||||
cpu = cpu_first_tlb_thread_sibling(cpu);
|
||||
cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
|
||||
}
|
||||
|
||||
|
|
|
@ -1522,8 +1522,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(hash_page);
|
||||
|
||||
DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault);
|
||||
DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
|
||||
DECLARE_INTERRUPT_HANDLER(__do_hash_fault);
|
||||
DEFINE_INTERRUPT_HANDLER(__do_hash_fault)
|
||||
{
|
||||
unsigned long ea = regs->dar;
|
||||
unsigned long dsisr = regs->dsisr;
|
||||
|
@ -1533,6 +1533,11 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
|
|||
unsigned int region_id;
|
||||
long err;
|
||||
|
||||
if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
|
||||
hash__do_page_fault(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
region_id = get_region_id(ea);
|
||||
if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
|
||||
mm = &init_mm;
|
||||
|
@ -1571,9 +1576,10 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
|
|||
bad_page_fault(regs, SIGBUS);
|
||||
}
|
||||
err = 0;
|
||||
}
|
||||
|
||||
return err;
|
||||
} else if (err) {
|
||||
hash__do_page_fault(regs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1582,13 +1588,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault)
|
|||
*/
|
||||
DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
|
||||
{
|
||||
unsigned long dsisr = regs->dsisr;
|
||||
|
||||
if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
|
||||
hash__do_page_fault(regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are in an "NMI" (e.g., an interrupt when soft-disabled), then
|
||||
* don't call hash_page, just fail the fault. This is required to
|
||||
|
@ -1607,8 +1606,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (__do_hash_fault(regs))
|
||||
hash__do_page_fault(regs);
|
||||
__do_hash_fault(regs);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -78,9 +78,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
|
|||
|
||||
pcpu = get_hard_smp_processor_id(lcpu);
|
||||
|
||||
/* Fixup atomic count: it exited inside IRQ handler. */
|
||||
task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
|
||||
|
||||
/*
|
||||
* If the RTAS start-cpu token does not exist then presume the
|
||||
* cpu is already spinning.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <asm/plpar_wrappers.h>
|
||||
#include <asm/papr_pdsm.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#define BIND_ANY_ADDR (~0ul)
|
||||
|
||||
|
@ -900,6 +901,20 @@ static ssize_t flags_show(struct device *dev,
|
|||
}
|
||||
DEVICE_ATTR_RO(flags);
|
||||
|
||||
static umode_t papr_nd_attribute_visible(struct kobject *kobj,
|
||||
struct attribute *attr, int n)
|
||||
{
|
||||
struct device *dev = kobj_to_dev(kobj);
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
struct papr_scm_priv *p = nvdimm_provider_data(nvdimm);
|
||||
|
||||
/* For if perf-stats not available remove perf_stats sysfs */
|
||||
if (attr == &dev_attr_perf_stats.attr && p->stat_buffer_len == 0)
|
||||
return 0;
|
||||
|
||||
return attr->mode;
|
||||
}
|
||||
|
||||
/* papr_scm specific dimm attributes */
|
||||
static struct attribute *papr_nd_attributes[] = {
|
||||
&dev_attr_flags.attr,
|
||||
|
@ -909,6 +924,7 @@ static struct attribute *papr_nd_attributes[] = {
|
|||
|
||||
static struct attribute_group papr_nd_attribute_group = {
|
||||
.name = "papr",
|
||||
.is_visible = papr_nd_attribute_visible,
|
||||
.attrs = papr_nd_attributes,
|
||||
};
|
||||
|
||||
|
@ -924,7 +940,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
|
|||
struct nd_region_desc ndr_desc;
|
||||
unsigned long dimm_flags;
|
||||
int target_nid, online_nid;
|
||||
ssize_t stat_size;
|
||||
|
||||
p->bus_desc.ndctl = papr_scm_ndctl;
|
||||
p->bus_desc.module = THIS_MODULE;
|
||||
|
@ -1009,16 +1024,6 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
|
|||
list_add_tail(&p->region_list, &papr_nd_regions);
|
||||
mutex_unlock(&papr_ndr_lock);
|
||||
|
||||
/* Try retriving the stat buffer and see if its supported */
|
||||
stat_size = drc_pmem_query_stats(p, NULL, 0);
|
||||
if (stat_size > 0) {
|
||||
p->stat_buffer_len = stat_size;
|
||||
dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
|
||||
p->stat_buffer_len);
|
||||
} else {
|
||||
dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err: nvdimm_bus_unregister(p->bus);
|
||||
|
@ -1094,8 +1099,10 @@ static int papr_scm_probe(struct platform_device *pdev)
|
|||
u32 drc_index, metadata_size;
|
||||
u64 blocks, block_size;
|
||||
struct papr_scm_priv *p;
|
||||
u8 uuid_raw[UUID_SIZE];
|
||||
const char *uuid_str;
|
||||
u64 uuid[2];
|
||||
ssize_t stat_size;
|
||||
uuid_t uuid;
|
||||
int rc;
|
||||
|
||||
/* check we have all the required DT properties */
|
||||
|
@ -1138,16 +1145,23 @@ static int papr_scm_probe(struct platform_device *pdev)
|
|||
p->hcall_flush_required = of_property_read_bool(dn, "ibm,hcall-flush-required");
|
||||
|
||||
/* We just need to ensure that set cookies are unique across */
|
||||
uuid_parse(uuid_str, (uuid_t *) uuid);
|
||||
uuid_parse(uuid_str, &uuid);
|
||||
|
||||
/*
|
||||
* cookie1 and cookie2 are not really little endian
|
||||
* we store a little endian representation of the
|
||||
* uuid str so that we can compare this with the label
|
||||
* area cookie irrespective of the endian config with which
|
||||
* the kernel is built.
|
||||
* The cookie1 and cookie2 are not really little endian.
|
||||
* We store a raw buffer representation of the
|
||||
* uuid string so that we can compare this with the label
|
||||
* area cookie irrespective of the endian configuration
|
||||
* with which the kernel is built.
|
||||
*
|
||||
* Historically we stored the cookie in the below format.
|
||||
* for a uuid string 72511b67-0b3b-42fd-8d1d-5be3cae8bcaa
|
||||
* cookie1 was 0xfd423b0b671b5172
|
||||
* cookie2 was 0xaabce8cae35b1d8d
|
||||
*/
|
||||
p->nd_set.cookie1 = cpu_to_le64(uuid[0]);
|
||||
p->nd_set.cookie2 = cpu_to_le64(uuid[1]);
|
||||
export_uuid(uuid_raw, &uuid);
|
||||
p->nd_set.cookie1 = get_unaligned_le64(&uuid_raw[0]);
|
||||
p->nd_set.cookie2 = get_unaligned_le64(&uuid_raw[8]);
|
||||
|
||||
/* might be zero */
|
||||
p->metadata_size = metadata_size;
|
||||
|
@ -1172,6 +1186,14 @@ static int papr_scm_probe(struct platform_device *pdev)
|
|||
p->res.name = pdev->name;
|
||||
p->res.flags = IORESOURCE_MEM;
|
||||
|
||||
/* Try retrieving the stat buffer and see if its supported */
|
||||
stat_size = drc_pmem_query_stats(p, NULL, 0);
|
||||
if (stat_size > 0) {
|
||||
p->stat_buffer_len = stat_size;
|
||||
dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
|
||||
p->stat_buffer_len);
|
||||
}
|
||||
|
||||
rc = papr_scm_nvdimm_init(p);
|
||||
if (rc)
|
||||
goto err2;
|
||||
|
|
|
@ -105,9 +105,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
|
|||
return 1;
|
||||
}
|
||||
|
||||
/* Fixup atomic count: it exited inside IRQ handler. */
|
||||
task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
|
||||
|
||||
/*
|
||||
* If the RTAS start-cpu token does not exist then presume the
|
||||
* cpu is already spinning.
|
||||
|
@ -211,7 +208,9 @@ static __init void pSeries_smp_probe(void)
|
|||
if (!cpu_has_feature(CPU_FTR_SMT))
|
||||
return;
|
||||
|
||||
if (check_kvm_guest()) {
|
||||
check_kvm_guest();
|
||||
|
||||
if (is_kvm_guest()) {
|
||||
/*
|
||||
* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
|
||||
* faults to the hypervisor which then reads the instruction
|
||||
|
|
|
@ -180,7 +180,6 @@ asmlinkage __visible void smp_callin(void)
|
|||
* Disable preemption before enabling interrupts, so we don't try to
|
||||
* schedule a CPU that hasn't actually started yet.
|
||||
*/
|
||||
preempt_disable();
|
||||
local_irq_enable();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
}
|
||||
|
|
|
@ -164,6 +164,7 @@ config S390
|
|||
select HAVE_FUTEX_CMPXCHG if FUTEX
|
||||
select HAVE_GCC_PLUGINS
|
||||
select HAVE_GENERIC_VDSO
|
||||
select HAVE_IOREMAP_PROT if PCI
|
||||
select HAVE_IRQ_EXIT_ON_IRQ_STACK
|
||||
select HAVE_KERNEL_BZIP2
|
||||
select HAVE_KERNEL_GZIP
|
||||
|
@ -853,7 +854,7 @@ config CMM_IUCV
|
|||
config APPLDATA_BASE
|
||||
def_bool n
|
||||
prompt "Linux - VM Monitor Stream, base infrastructure"
|
||||
depends on PROC_FS
|
||||
depends on PROC_SYSCTL
|
||||
help
|
||||
This provides a kernel interface for creating and updating z/VM APPLDATA
|
||||
monitor records. The monitor records are updated at certain time
|
||||
|
|
|
@ -36,6 +36,7 @@ void uv_query_info(void)
|
|||
uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);
|
||||
uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;
|
||||
uv_info.max_guest_cpu_id = uvcb.max_guest_cpu_id;
|
||||
uv_info.uv_feature_indications = uvcb.uv_feature_indications;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
|
||||
|
|
|
@ -344,8 +344,6 @@ static inline int is_module_addr(void *addr)
|
|||
#define PTRS_PER_P4D _CRST_ENTRIES
|
||||
#define PTRS_PER_PGD _CRST_ENTRIES
|
||||
|
||||
#define MAX_PTRS_PER_P4D PTRS_PER_P4D
|
||||
|
||||
/*
|
||||
* Segment table and region3 table entry encoding
|
||||
* (R = read-only, I = invalid, y = young bit):
|
||||
|
@ -865,6 +863,25 @@ static inline int pte_unused(pte_t pte)
|
|||
return pte_val(pte) & _PAGE_UNUSED;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract the pgprot value from the given pte while at the same time making it
|
||||
* usable for kernel address space mappings where fault driven dirty and
|
||||
* young/old accounting is not supported, i.e _PAGE_PROTECT and _PAGE_INVALID
|
||||
* must not be set.
|
||||
*/
|
||||
static inline pgprot_t pte_pgprot(pte_t pte)
|
||||
{
|
||||
unsigned long pte_flags = pte_val(pte) & _PAGE_CHG_MASK;
|
||||
|
||||
if (pte_write(pte))
|
||||
pte_flags |= pgprot_val(PAGE_KERNEL);
|
||||
else
|
||||
pte_flags |= pgprot_val(PAGE_KERNEL_RO);
|
||||
pte_flags |= pte_val(pte) & mio_wb_bit_mask;
|
||||
|
||||
return __pgprot(pte_flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* pgd/pmd/pte modification functions
|
||||
*/
|
||||
|
|
|
@ -29,12 +29,6 @@ static inline void preempt_count_set(int pc)
|
|||
old, new) != old);
|
||||
}
|
||||
|
||||
#define init_task_preempt_count(p) do { } while (0)
|
||||
|
||||
#define init_idle_preempt_count(p, cpu) do { \
|
||||
S390_lowcore.preempt_count = PREEMPT_ENABLED; \
|
||||
} while (0)
|
||||
|
||||
static inline void set_preempt_need_resched(void)
|
||||
{
|
||||
__atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count);
|
||||
|
@ -88,12 +82,6 @@ static inline void preempt_count_set(int pc)
|
|||
S390_lowcore.preempt_count = pc;
|
||||
}
|
||||
|
||||
#define init_task_preempt_count(p) do { } while (0)
|
||||
|
||||
#define init_idle_preempt_count(p, cpu) do { \
|
||||
S390_lowcore.preempt_count = PREEMPT_ENABLED; \
|
||||
} while (0)
|
||||
|
||||
static inline void set_preempt_need_resched(void)
|
||||
{
|
||||
}
|
||||
|
@ -130,6 +118,10 @@ static inline bool should_resched(int preempt_offset)
|
|||
|
||||
#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
|
||||
|
||||
#define init_task_preempt_count(p) do { } while (0)
|
||||
/* Deferred to CPU bringup time */
|
||||
#define init_idle_preempt_count(p, cpu) do { } while (0)
|
||||
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
extern void preempt_schedule(void);
|
||||
#define __preempt_schedule() preempt_schedule()
|
||||
|
|
|
@ -73,6 +73,10 @@ enum uv_cmds_inst {
|
|||
BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
|
||||
};
|
||||
|
||||
enum uv_feat_ind {
|
||||
BIT_UV_FEAT_MISC = 0,
|
||||
};
|
||||
|
||||
struct uv_cb_header {
|
||||
u16 len;
|
||||
u16 cmd; /* Command Code */
|
||||
|
@ -97,7 +101,8 @@ struct uv_cb_qui {
|
|||
u64 max_guest_stor_addr;
|
||||
u8 reserved88[158 - 136];
|
||||
u16 max_guest_cpu_id;
|
||||
u8 reserveda0[200 - 160];
|
||||
u64 uv_feature_indications;
|
||||
u8 reserveda0[200 - 168];
|
||||
} __packed __aligned(8);
|
||||
|
||||
/* Initialize Ultravisor */
|
||||
|
@ -274,6 +279,7 @@ struct uv_info {
|
|||
unsigned long max_sec_stor_addr;
|
||||
unsigned int max_num_sec_conf;
|
||||
unsigned short max_guest_cpu_id;
|
||||
unsigned long uv_feature_indications;
|
||||
};
|
||||
|
||||
extern struct uv_info uv_info;
|
||||
|
|
|
@ -466,6 +466,7 @@ static void __init setup_lowcore_dat_off(void)
|
|||
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
|
||||
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
|
||||
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
|
||||
lc->preempt_count = PREEMPT_DISABLED;
|
||||
|
||||
set_prefix((u32)(unsigned long) lc);
|
||||
lowcore_ptr[0] = lc;
|
||||
|
|
|
@ -219,6 +219,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
|
|||
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
|
||||
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
|
||||
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
|
||||
lc->preempt_count = PREEMPT_DISABLED;
|
||||
if (nmi_alloc_per_cpu(lc))
|
||||
goto out_stack;
|
||||
lowcore_ptr[cpu] = lc;
|
||||
|
@ -878,7 +879,6 @@ static void smp_init_secondary(void)
|
|||
restore_access_regs(S390_lowcore.access_regs_save_area);
|
||||
cpu_init();
|
||||
rcu_cpu_starting(cpu);
|
||||
preempt_disable();
|
||||
init_cpu_timer();
|
||||
vtime_init();
|
||||
vdso_getcpu_init();
|
||||
|
|
|
@ -364,6 +364,15 @@ static ssize_t uv_query_facilities(struct kobject *kobj,
|
|||
static struct kobj_attribute uv_query_facilities_attr =
|
||||
__ATTR(facilities, 0444, uv_query_facilities, NULL);
|
||||
|
||||
static ssize_t uv_query_feature_indications(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%lx\n", uv_info.uv_feature_indications);
|
||||
}
|
||||
|
||||
static struct kobj_attribute uv_query_feature_indications_attr =
|
||||
__ATTR(feature_indications, 0444, uv_query_feature_indications, NULL);
|
||||
|
||||
static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *page)
|
||||
{
|
||||
|
@ -396,6 +405,7 @@ static struct kobj_attribute uv_query_max_guest_addr_attr =
|
|||
|
||||
static struct attribute *uv_query_attrs[] = {
|
||||
&uv_query_facilities_attr.attr,
|
||||
&uv_query_feature_indications_attr.attr,
|
||||
&uv_query_max_guest_cpus_attr.attr,
|
||||
&uv_query_max_guest_vms_attr.attr,
|
||||
&uv_query_max_guest_addr_attr.attr,
|
||||
|
|
|
@ -329,31 +329,31 @@ static void allow_cpu_feat(unsigned long nr)
|
|||
|
||||
static inline int plo_test_bit(unsigned char nr)
|
||||
{
|
||||
register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
|
||||
unsigned long function = (unsigned long)nr | 0x100;
|
||||
int cc;
|
||||
|
||||
asm volatile(
|
||||
" lgr 0,%[function]\n"
|
||||
/* Parameter registers are ignored for "test bit" */
|
||||
" plo 0,0,0,0(0)\n"
|
||||
" ipm %0\n"
|
||||
" srl %0,28\n"
|
||||
: "=d" (cc)
|
||||
: "d" (r0)
|
||||
: "cc");
|
||||
: [function] "d" (function)
|
||||
: "cc", "0");
|
||||
return cc == 0;
|
||||
}
|
||||
|
||||
static __always_inline void __insn32_query(unsigned int opcode, u8 *query)
|
||||
{
|
||||
register unsigned long r0 asm("0") = 0; /* query function */
|
||||
register unsigned long r1 asm("1") = (unsigned long) query;
|
||||
|
||||
asm volatile(
|
||||
/* Parameter regs are ignored */
|
||||
" lghi 0,0\n"
|
||||
" lgr 1,%[query]\n"
|
||||
/* Parameter registers are ignored */
|
||||
" .insn rrf,%[opc] << 16,2,4,6,0\n"
|
||||
:
|
||||
: "d" (r0), "a" (r1), [opc] "i" (opcode)
|
||||
: "cc", "memory");
|
||||
: [query] "d" ((unsigned long)query), [opc] "i" (opcode)
|
||||
: "cc", "memory", "0", "1");
|
||||
}
|
||||
|
||||
#define INSN_SORTL 0xb938
|
||||
|
|
|
@ -792,6 +792,32 @@ void do_secure_storage_access(struct pt_regs *regs)
|
|||
struct page *page;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* bit 61 tells us if the address is valid, if it's not we
|
||||
* have a major problem and should stop the kernel or send a
|
||||
* SIGSEGV to the process. Unfortunately bit 61 is not
|
||||
* reliable without the misc UV feature so we need to check
|
||||
* for that as well.
|
||||
*/
|
||||
if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
|
||||
!test_bit_inv(61, ®s->int_parm_long)) {
|
||||
/*
|
||||
* When this happens, userspace did something that it
|
||||
* was not supposed to do, e.g. branching into secure
|
||||
* memory. Trigger a segmentation fault.
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
send_sig(SIGSEGV, current, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The kernel should never run into this case and we
|
||||
* have no way out of this situation.
|
||||
*/
|
||||
panic("Unexpected PGM 0x3d with TEID bit 61=0");
|
||||
}
|
||||
|
||||
switch (get_fault_type(regs)) {
|
||||
case USER_FAULT:
|
||||
mm = current->mm;
|
||||
|
|
|
@ -186,8 +186,6 @@ asmlinkage void start_secondary(void)
|
|||
|
||||
per_cpu_trap_init();
|
||||
|
||||
preempt_disable();
|
||||
|
||||
notify_cpu_starting(cpu);
|
||||
|
||||
local_irq_enable();
|
||||
|
|
|
@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg)
|
|||
*/
|
||||
arch_cpu_pre_starting(arg);
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
|
||||
notify_cpu_starting(cpu);
|
||||
|
|
|
@ -138,9 +138,6 @@ void smp_callin(void)
|
|||
|
||||
set_cpu_online(cpuid, true);
|
||||
|
||||
/* idle thread is expected to have preempt disabled */
|
||||
preempt_disable();
|
||||
|
||||
local_irq_enable();
|
||||
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
|
|
@ -1500,7 +1500,7 @@ static int __init curve25519_mod_init(void)
|
|||
static void __exit curve25519_mod_exit(void)
|
||||
{
|
||||
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
|
||||
(boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
|
||||
static_branch_likely(&curve25519_use_bmi2_adx))
|
||||
crypto_unregister_kpp(&curve25519_alg);
|
||||
}
|
||||
|
||||
|
|
|
@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
|
|||
|
||||
movq %rsp, %rdi /* pt_regs pointer */
|
||||
|
||||
call \cfunc
|
||||
call kernel_\cfunc
|
||||
|
||||
/*
|
||||
* No need to switch back to the IST stack. The current stack is either
|
||||
|
@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
|
|||
|
||||
/* Switch to the regular task stack */
|
||||
.Lfrom_usermode_switch_stack_\@:
|
||||
idtentry_body safe_stack_\cfunc, has_error_code=1
|
||||
idtentry_body user_\cfunc, has_error_code=1
|
||||
|
||||
_ASM_NOKPROBE(\asmsym)
|
||||
SYM_CODE_END(\asmsym)
|
||||
|
|
|
@ -1626,6 +1626,8 @@ static void x86_pmu_del(struct perf_event *event, int flags)
|
|||
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
|
||||
goto do_del;
|
||||
|
||||
__set_bit(event->hw.idx, cpuc->dirty);
|
||||
|
||||
/*
|
||||
* Not a TXN, therefore cleanup properly.
|
||||
*/
|
||||
|
@ -2474,6 +2476,31 @@ static int x86_pmu_event_init(struct perf_event *event)
|
|||
return err;
|
||||
}
|
||||
|
||||
void perf_clear_dirty_counters(void)
|
||||
{
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
int i;
|
||||
|
||||
/* Don't need to clear the assigned counter. */
|
||||
for (i = 0; i < cpuc->n_events; i++)
|
||||
__clear_bit(cpuc->assign[i], cpuc->dirty);
|
||||
|
||||
if (bitmap_empty(cpuc->dirty, X86_PMC_IDX_MAX))
|
||||
return;
|
||||
|
||||
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
|
||||
/* Metrics and fake events don't have corresponding HW counters. */
|
||||
if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
|
||||
continue;
|
||||
else if (i >= INTEL_PMC_IDX_FIXED)
|
||||
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
|
||||
else
|
||||
wrmsrl(x86_pmu_event_addr(i), 0);
|
||||
}
|
||||
|
||||
bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
|
||||
}
|
||||
|
||||
static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
|
||||
|
@ -2497,7 +2524,6 @@ static void x86_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
|
|||
|
||||
static void x86_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
|
||||
{
|
||||
|
||||
if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED))
|
||||
return;
|
||||
|
||||
|
|
|
@ -280,6 +280,8 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
|
|||
INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
|
||||
INTEL_UEVENT_EXTRA_REG(0x40ad, MSR_PEBS_FRONTEND, 0x7, FE),
|
||||
INTEL_UEVENT_EXTRA_REG(0x04c2, MSR_PEBS_FRONTEND, 0x8, FE),
|
||||
EVENT_EXTRA_END
|
||||
};
|
||||
|
||||
|
@ -4030,8 +4032,10 @@ spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
|
|||
* The :ppp indicates the Precise Distribution (PDist) facility, which
|
||||
* is only supported on the GP counter 0. If a :ppp event which is not
|
||||
* available on the GP counter 0, error out.
|
||||
* Exception: Instruction PDIR is only available on the fixed counter 0.
|
||||
*/
|
||||
if (event->attr.precise_ip == 3) {
|
||||
if ((event->attr.precise_ip == 3) &&
|
||||
!constraint_match(&fixed0_constraint, event->hw.config)) {
|
||||
if (c->idxmsk64 & BIT_ULL(0))
|
||||
return &counter0_constraint;
|
||||
|
||||
|
@ -6157,8 +6161,13 @@ __init int intel_pmu_init(void)
|
|||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
pmu->name = "cpu_core";
|
||||
pmu->cpu_type = hybrid_big;
|
||||
pmu->num_counters = x86_pmu.num_counters + 2;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
|
||||
pmu->num_counters = x86_pmu.num_counters + 2;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
|
||||
} else {
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
}
|
||||
pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
|
||||
pmu->unconstrained = (struct event_constraint)
|
||||
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
|
||||
|
|
|
@ -229,6 +229,7 @@ struct cpu_hw_events {
|
|||
*/
|
||||
struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
|
||||
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
unsigned long dirty[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
|
||||
int enabled;
|
||||
|
||||
int n_events; /* the # of events in the below arrays */
|
||||
|
|
|
@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
|
|||
*/
|
||||
#define DECLARE_IDTENTRY_VC(vector, func) \
|
||||
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
|
||||
__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \
|
||||
__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code)
|
||||
__visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
|
||||
__visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
|
||||
|
||||
/**
|
||||
* DEFINE_IDTENTRY_IST - Emit code for IST entry points
|
||||
|
@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
|
|||
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
|
||||
|
||||
/**
|
||||
* DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler
|
||||
which runs on a safe stack.
|
||||
* DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
|
||||
when raised from kernel mode
|
||||
* @func: Function name of the entry point
|
||||
*
|
||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
||||
*/
|
||||
#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \
|
||||
DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func)
|
||||
#define DEFINE_IDTENTRY_VC_KERNEL(func) \
|
||||
DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
|
||||
|
||||
/**
|
||||
* DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler
|
||||
which runs on the VC fall-back stack
|
||||
* DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
|
||||
when raised from user mode
|
||||
* @func: Function name of the entry point
|
||||
*
|
||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
||||
*/
|
||||
#define DEFINE_IDTENTRY_VC_IST(func) \
|
||||
DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func)
|
||||
|
||||
/**
|
||||
* DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
|
||||
* @func: Function name of the entry point
|
||||
*
|
||||
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
|
||||
*/
|
||||
#define DEFINE_IDTENTRY_VC(func) \
|
||||
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
|
||||
#define DEFINE_IDTENTRY_VC_USER(func) \
|
||||
DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@
|
|||
#define KVM_REQ_APICV_UPDATE \
|
||||
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
|
||||
#define KVM_REQ_HV_TLB_FLUSH \
|
||||
#define KVM_REQ_TLB_FLUSH_GUEST \
|
||||
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
|
||||
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
|
||||
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
|
||||
|
@ -320,6 +320,7 @@ union kvm_mmu_extended_role {
|
|||
unsigned int cr4_pke:1;
|
||||
unsigned int cr4_smap:1;
|
||||
unsigned int cr4_smep:1;
|
||||
unsigned int cr4_la57:1;
|
||||
unsigned int maxphyaddr:6;
|
||||
};
|
||||
};
|
||||
|
@ -1463,6 +1464,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
|
|||
void kvm_mmu_init_vm(struct kvm *kvm);
|
||||
void kvm_mmu_uninit_vm(struct kvm *kvm);
|
||||
|
||||
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
|
||||
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
|
||||
struct kvm_memory_slot *memslot,
|
||||
|
|
|
@ -478,6 +478,7 @@ struct x86_pmu_lbr {
|
|||
|
||||
extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
|
||||
extern void perf_check_microcode(void);
|
||||
extern void perf_clear_dirty_counters(void);
|
||||
extern int x86_perf_rdpmc_index(struct perf_event *event);
|
||||
#else
|
||||
static inline void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap)
|
||||
|
|
|
@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
|
|||
#define init_task_preempt_count(p) do { } while (0)
|
||||
|
||||
#define init_idle_preempt_count(p, cpu) do { \
|
||||
per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \
|
||||
per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
|
|
|
@ -2,10 +2,12 @@
|
|||
#ifndef _ASM_X86_HWCAP2_H
|
||||
#define _ASM_X86_HWCAP2_H
|
||||
|
||||
#include <linux/const.h>
|
||||
|
||||
/* MONITOR/MWAIT enabled in Ring 3 */
|
||||
#define HWCAP2_RING3MWAIT (1 << 0)
|
||||
#define HWCAP2_RING3MWAIT _BITUL(0)
|
||||
|
||||
/* Kernel allows FSGSBASE instructions available in Ring 3 */
|
||||
#define HWCAP2_FSGSBASE BIT(1)
|
||||
#define HWCAP2_FSGSBASE _BITUL(1)
|
||||
|
||||
#endif
|
||||
|
|
|
@ -237,7 +237,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus)
|
|||
for_each_present_cpu(i) {
|
||||
if (i == 0)
|
||||
continue;
|
||||
ret = hv_call_add_logical_proc(numa_cpu_node(i), i, cpu_physical_id(i));
|
||||
ret = hv_call_add_logical_proc(numa_cpu_node(i), i, i);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
|
|
|
@ -549,6 +549,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
|
|||
INTEL_CNL_IDS(&gen9_early_ops),
|
||||
INTEL_ICL_11_IDS(&gen11_early_ops),
|
||||
INTEL_EHL_IDS(&gen11_early_ops),
|
||||
INTEL_JSL_IDS(&gen11_early_ops),
|
||||
INTEL_TGL_12_IDS(&gen11_early_ops),
|
||||
INTEL_RKL_IDS(&gen11_early_ops),
|
||||
INTEL_ADLS_IDS(&gen11_early_ops),
|
||||
|
|
|
@ -7,12 +7,11 @@
|
|||
* Author: Joerg Roedel <jroedel@suse.de>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "SEV-ES: " fmt
|
||||
#define pr_fmt(fmt) "SEV: " fmt
|
||||
|
||||
#include <linux/sched/debug.h> /* For show_regs() */
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/set_memory.h>
|
||||
|
@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void)
|
|||
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
|
||||
}
|
||||
|
||||
static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
|
||||
/*
|
||||
* Nothing shall interrupt this code path while holding the per-CPU
|
||||
* GHCB. The backup GHCB is only for NMIs interrupting this path.
|
||||
*
|
||||
* Callers must disable local interrupts around it.
|
||||
*/
|
||||
static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
|
||||
{
|
||||
struct sev_es_runtime_data *data;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
data = this_cpu_read(runtime_data);
|
||||
ghcb = &data->ghcb_page;
|
||||
|
||||
|
@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
|
|||
data->ghcb_active = false;
|
||||
data->backup_ghcb_active = false;
|
||||
|
||||
instrumentation_begin();
|
||||
panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/* Mark backup_ghcb active before writing to it */
|
||||
|
@ -479,11 +488,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
|
|||
/* Include code shared with pre-decompression boot stage */
|
||||
#include "sev-shared.c"
|
||||
|
||||
static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
|
||||
static noinstr void __sev_put_ghcb(struct ghcb_state *state)
|
||||
{
|
||||
struct sev_es_runtime_data *data;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
data = this_cpu_read(runtime_data);
|
||||
ghcb = &data->ghcb_page;
|
||||
|
||||
|
@ -507,7 +518,7 @@ void noinstr __sev_es_nmi_complete(void)
|
|||
struct ghcb_state state;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
ghcb = sev_es_get_ghcb(&state);
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
|
||||
vc_ghcb_invalidate(ghcb);
|
||||
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
|
||||
|
@ -517,7 +528,7 @@ void noinstr __sev_es_nmi_complete(void)
|
|||
sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
|
||||
VMGEXIT();
|
||||
|
||||
sev_es_put_ghcb(&state);
|
||||
__sev_put_ghcb(&state);
|
||||
}
|
||||
|
||||
static u64 get_jump_table_addr(void)
|
||||
|
@ -529,7 +540,7 @@ static u64 get_jump_table_addr(void)
|
|||
|
||||
local_irq_save(flags);
|
||||
|
||||
ghcb = sev_es_get_ghcb(&state);
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
|
||||
vc_ghcb_invalidate(ghcb);
|
||||
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
|
||||
|
@ -543,7 +554,7 @@ static u64 get_jump_table_addr(void)
|
|||
ghcb_sw_exit_info_2_is_valid(ghcb))
|
||||
ret = ghcb->save.sw_exit_info_2;
|
||||
|
||||
sev_es_put_ghcb(&state);
|
||||
__sev_put_ghcb(&state);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
|
@ -668,7 +679,7 @@ static void sev_es_ap_hlt_loop(void)
|
|||
struct ghcb_state state;
|
||||
struct ghcb *ghcb;
|
||||
|
||||
ghcb = sev_es_get_ghcb(&state);
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
|
||||
while (true) {
|
||||
vc_ghcb_invalidate(ghcb);
|
||||
|
@ -685,7 +696,7 @@ static void sev_es_ap_hlt_loop(void)
|
|||
break;
|
||||
}
|
||||
|
||||
sev_es_put_ghcb(&state);
|
||||
__sev_put_ghcb(&state);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -775,7 +786,7 @@ void __init sev_es_init_vc_handling(void)
|
|||
sev_es_setup_play_dead();
|
||||
|
||||
/* Secondary CPUs use the runtime #VC handler */
|
||||
initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
|
||||
initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
|
||||
}
|
||||
|
||||
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
|
||||
|
@ -1213,14 +1224,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
|
|||
return ES_EXCEPTION;
|
||||
}
|
||||
|
||||
static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
|
||||
{
|
||||
if (user_mode(regs))
|
||||
noist_exc_debug(regs);
|
||||
else
|
||||
exc_debug(regs);
|
||||
}
|
||||
|
||||
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
|
||||
struct ghcb *ghcb,
|
||||
unsigned long exit_code)
|
||||
|
@ -1316,44 +1319,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
|
|||
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
|
||||
}
|
||||
|
||||
/*
|
||||
* Main #VC exception handler. It is called when the entry code was able to
|
||||
* switch off the IST to a safe kernel stack.
|
||||
*
|
||||
* With the current implementation it is always possible to switch to a safe
|
||||
* stack because #VC exceptions only happen at known places, like intercepted
|
||||
* instructions or accesses to MMIO areas/IO ports. They can also happen with
|
||||
* code instrumentation when the hypervisor intercepts #DB, but the critical
|
||||
* paths are forbidden to be instrumented, so #DB exceptions currently also
|
||||
* only happen in safe places.
|
||||
*/
|
||||
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
||||
static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
|
||||
{
|
||||
irqentry_state_t irq_state;
|
||||
struct ghcb_state state;
|
||||
struct es_em_ctxt ctxt;
|
||||
enum es_result result;
|
||||
struct ghcb *ghcb;
|
||||
bool ret = true;
|
||||
|
||||
/*
|
||||
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||
*/
|
||||
if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
|
||||
vc_handle_trap_db(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
irq_state = irqentry_nmi_enter(regs);
|
||||
lockdep_assert_irqs_disabled();
|
||||
instrumentation_begin();
|
||||
|
||||
/*
|
||||
* This is invoked through an interrupt gate, so IRQs are disabled. The
|
||||
* code below might walk page-tables for user or kernel addresses, so
|
||||
* keep the IRQs disabled to protect us against concurrent TLB flushes.
|
||||
*/
|
||||
|
||||
ghcb = sev_es_get_ghcb(&state);
|
||||
ghcb = __sev_get_ghcb(&state);
|
||||
|
||||
vc_ghcb_invalidate(ghcb);
|
||||
result = vc_init_em_ctxt(&ctxt, regs, error_code);
|
||||
|
@ -1361,7 +1335,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|||
if (result == ES_OK)
|
||||
result = vc_handle_exitcode(&ctxt, ghcb, error_code);
|
||||
|
||||
sev_es_put_ghcb(&state);
|
||||
__sev_put_ghcb(&state);
|
||||
|
||||
/* Done - now check the result */
|
||||
switch (result) {
|
||||
|
@ -1371,15 +1345,18 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|||
case ES_UNSUPPORTED:
|
||||
pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
|
||||
error_code, regs->ip);
|
||||
goto fail;
|
||||
ret = false;
|
||||
break;
|
||||
case ES_VMM_ERROR:
|
||||
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
|
||||
error_code, regs->ip);
|
||||
goto fail;
|
||||
ret = false;
|
||||
break;
|
||||
case ES_DECODE_FAILED:
|
||||
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
|
||||
error_code, regs->ip);
|
||||
goto fail;
|
||||
ret = false;
|
||||
break;
|
||||
case ES_EXCEPTION:
|
||||
vc_forward_exception(&ctxt);
|
||||
break;
|
||||
|
@ -1395,24 +1372,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
|
|||
BUG();
|
||||
}
|
||||
|
||||
out:
|
||||
instrumentation_end();
|
||||
irqentry_nmi_exit(regs, irq_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return;
|
||||
static __always_inline bool vc_is_db(unsigned long error_code)
|
||||
{
|
||||
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
|
||||
}
|
||||
|
||||
fail:
|
||||
if (user_mode(regs)) {
|
||||
/*
|
||||
* Do not kill the machine if user-space triggered the
|
||||
* exception. Send SIGBUS instead and let user-space deal with
|
||||
* it.
|
||||
*/
|
||||
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
|
||||
} else {
|
||||
pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
|
||||
result);
|
||||
/*
|
||||
* Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
|
||||
* and will panic when an error happens.
|
||||
*/
|
||||
DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
|
||||
{
|
||||
irqentry_state_t irq_state;
|
||||
|
||||
/*
|
||||
* With the current implementation it is always possible to switch to a
|
||||
* safe stack because #VC exceptions only happen at known places, like
|
||||
* intercepted instructions or accesses to MMIO areas/IO ports. They can
|
||||
* also happen with code instrumentation when the hypervisor intercepts
|
||||
* #DB, but the critical paths are forbidden to be instrumented, so #DB
|
||||
* exceptions currently also only happen in safe places.
|
||||
*
|
||||
* But keep this here in case the noinstr annotations are violated due
|
||||
* to bug elsewhere.
|
||||
*/
|
||||
if (unlikely(on_vc_fallback_stack(regs))) {
|
||||
instrumentation_begin();
|
||||
panic("Can't handle #VC exception from unsupported context\n");
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||
*/
|
||||
if (vc_is_db(error_code)) {
|
||||
exc_debug(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
irq_state = irqentry_nmi_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
|
||||
if (!vc_raw_handle_exception(regs, error_code)) {
|
||||
/* Show some debug info */
|
||||
show_regs(regs);
|
||||
|
||||
|
@ -1423,23 +1428,38 @@ fail:
|
|||
panic("Returned from Terminate-Request to Hypervisor\n");
|
||||
}
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
|
||||
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
|
||||
{
|
||||
instrumentation_begin();
|
||||
panic("Can't handle #VC exception from unsupported context\n");
|
||||
instrumentation_end();
|
||||
irqentry_nmi_exit(regs, irq_state);
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_VC(exc_vmm_communication)
|
||||
/*
|
||||
* Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
|
||||
* and will kill the current task with SIGBUS when an error happens.
|
||||
*/
|
||||
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
|
||||
{
|
||||
if (likely(!on_vc_fallback_stack(regs)))
|
||||
safe_stack_exc_vmm_communication(regs, error_code);
|
||||
else
|
||||
ist_exc_vmm_communication(regs, error_code);
|
||||
/*
|
||||
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
|
||||
*/
|
||||
if (vc_is_db(error_code)) {
|
||||
noist_exc_debug(regs);
|
||||
return;
|
||||
}
|
||||
|
||||
irqentry_enter_from_user_mode(regs);
|
||||
instrumentation_begin();
|
||||
|
||||
if (!vc_raw_handle_exception(regs, error_code)) {
|
||||
/*
|
||||
* Do not kill the machine if user-space triggered the
|
||||
* exception. Send SIGBUS instead and let user-space deal with
|
||||
* it.
|
||||
*/
|
||||
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
|
||||
}
|
||||
|
||||
instrumentation_end();
|
||||
irqentry_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
|
||||
|
|
|
@ -236,7 +236,6 @@ static void notrace start_secondary(void *unused)
|
|||
cpu_init();
|
||||
rcu_cpu_starting(raw_smp_processor_id());
|
||||
x86_cpuinit.early_percpu_clock_init();
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
|
||||
enable_start_cpu0 = 0;
|
||||
|
|
|
@ -1152,7 +1152,8 @@ static struct clocksource clocksource_tsc = {
|
|||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
|
||||
CLOCK_SOURCE_VALID_FOR_HRES |
|
||||
CLOCK_SOURCE_MUST_VERIFY,
|
||||
CLOCK_SOURCE_MUST_VERIFY |
|
||||
CLOCK_SOURCE_VERIFY_PERCPU,
|
||||
.vdso_clock_mode = VDSO_CLOCKMODE_TSC,
|
||||
.enable = tsc_cs_enable,
|
||||
.resume = tsc_resume,
|
||||
|
|
|
@ -202,10 +202,10 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
|||
static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
|
||||
|
||||
/*
|
||||
* Except for the MMU, which needs to be reset after any vendor
|
||||
* specific adjustments to the reserved GPA bits.
|
||||
* Except for the MMU, which needs to do its thing any vendor specific
|
||||
* adjustments to the reserved GPA bits.
|
||||
*/
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
kvm_mmu_after_set_cpuid(vcpu);
|
||||
}
|
||||
|
||||
static int is_efer_nx(void)
|
||||
|
|
|
@ -1704,7 +1704,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, u64 ingpa, u16 rep_cnt, bool
|
|||
* vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
|
||||
* analyze it here, flush TLB regardless of the specified address space.
|
||||
*/
|
||||
kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH,
|
||||
kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST,
|
||||
NULL, vcpu_mask, &hv_vcpu->tlb_flush);
|
||||
|
||||
ret_success:
|
||||
|
|
|
@ -4168,7 +4168,15 @@ static inline u64 reserved_hpa_bits(void)
|
|||
void
|
||||
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
|
||||
{
|
||||
bool uses_nx = context->nx ||
|
||||
/*
|
||||
* KVM uses NX when TDP is disabled to handle a variety of scenarios,
|
||||
* notably for huge SPTEs if iTLB multi-hit mitigation is enabled and
|
||||
* to generate correct permissions for CR0.WP=0/CR4.SMEP=1/EFER.NX=0.
|
||||
* The iTLB multi-hit workaround can be toggled at any time, so assume
|
||||
* NX can be used by any non-nested shadow MMU to avoid having to reset
|
||||
* MMU contexts. Note, KVM forces EFER.NX=1 when TDP is disabled.
|
||||
*/
|
||||
bool uses_nx = context->nx || !tdp_enabled ||
|
||||
context->mmu_role.base.smep_andnot_wp;
|
||||
struct rsvd_bits_validate *shadow_zero_check;
|
||||
int i;
|
||||
|
@ -4476,6 +4484,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
|
|||
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
|
||||
ext.cr4_pse = !!is_pse(vcpu);
|
||||
ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE);
|
||||
ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57);
|
||||
ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
|
||||
|
||||
ext.valid = 1;
|
||||
|
@ -4850,6 +4859,18 @@ kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu)
|
|||
return role.base;
|
||||
}
|
||||
|
||||
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
* Invalidate all MMU roles to force them to reinitialize as CPUID
|
||||
* information is factored into reserved bit calculations.
|
||||
*/
|
||||
vcpu->arch.root_mmu.mmu_role.ext.valid = 0;
|
||||
vcpu->arch.guest_mmu.mmu_role.ext.valid = 0;
|
||||
vcpu->arch.nested_mmu.mmu_role.ext.valid = 0;
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
}
|
||||
|
||||
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
kvm_mmu_unload(vcpu);
|
||||
|
|
|
@ -471,8 +471,7 @@ retry_walk:
|
|||
|
||||
error:
|
||||
errcode |= write_fault | user_fault;
|
||||
if (fetch_fault && (mmu->nx ||
|
||||
kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
|
||||
if (fetch_fault && (mmu->nx || mmu->mmu_role.ext.cr4_smep))
|
||||
errcode |= PFERR_FETCH_MASK;
|
||||
|
||||
walker->fault.vector = PF_VECTOR;
|
||||
|
|
|
@ -102,13 +102,6 @@ int make_spte(struct kvm_vcpu *vcpu, unsigned int pte_access, int level,
|
|||
else if (kvm_vcpu_ad_need_write_protect(vcpu))
|
||||
spte |= SPTE_TDP_AD_WRPROT_ONLY_MASK;
|
||||
|
||||
/*
|
||||
* Bits 62:52 of PAE SPTEs are reserved. WARN if said bits are set
|
||||
* if PAE paging may be employed (shadow paging or any 32-bit KVM).
|
||||
*/
|
||||
WARN_ON_ONCE((!tdp_enabled || !IS_ENABLED(CONFIG_X86_64)) &&
|
||||
(spte & SPTE_TDP_AD_MASK));
|
||||
|
||||
/*
|
||||
* For the EPT case, shadow_present_mask is 0 if hardware
|
||||
* supports exec-only page table entries. In that case,
|
||||
|
|
|
@ -912,7 +912,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
|||
kvm_pfn_t pfn, bool prefault)
|
||||
{
|
||||
u64 new_spte;
|
||||
int ret = 0;
|
||||
int ret = RET_PF_FIXED;
|
||||
int make_spte_ret = 0;
|
||||
|
||||
if (unlikely(is_noslot_pfn(pfn)))
|
||||
|
@ -949,7 +949,11 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write,
|
|||
rcu_dereference(iter->sptep));
|
||||
}
|
||||
|
||||
if (!prefault)
|
||||
/*
|
||||
* Increase pf_fixed in both RET_PF_EMULATE and RET_PF_FIXED to be
|
||||
* consistent with legacy MMU behavior.
|
||||
*/
|
||||
if (ret != RET_PF_SPURIOUS)
|
||||
vcpu->stat.pf_fixed++;
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -1127,12 +1127,19 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne
|
|||
|
||||
/*
|
||||
* Unconditionally skip the TLB flush on fast CR3 switch, all TLB
|
||||
* flushes are handled by nested_vmx_transition_tlb_flush(). See
|
||||
* nested_vmx_transition_mmu_sync for details on skipping the MMU sync.
|
||||
* flushes are handled by nested_vmx_transition_tlb_flush().
|
||||
*/
|
||||
if (!nested_ept)
|
||||
kvm_mmu_new_pgd(vcpu, cr3, true,
|
||||
!nested_vmx_transition_mmu_sync(vcpu));
|
||||
if (!nested_ept) {
|
||||
kvm_mmu_new_pgd(vcpu, cr3, true, true);
|
||||
|
||||
/*
|
||||
* A TLB flush on VM-Enter/VM-Exit flushes all linear mappings
|
||||
* across all PCIDs, i.e. all PGDs need to be synchronized.
|
||||
* See nested_vmx_transition_mmu_sync() for more details.
|
||||
*/
|
||||
if (nested_vmx_transition_mmu_sync(vcpu))
|
||||
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
||||
}
|
||||
|
||||
vcpu->arch.cr3 = cr3;
|
||||
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
|
||||
|
@ -3682,7 +3689,7 @@ void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
}
|
||||
|
||||
static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
int max_irr;
|
||||
|
@ -3690,17 +3697,17 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
|||
u16 status;
|
||||
|
||||
if (!vmx->nested.pi_desc || !vmx->nested.pi_pending)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
vmx->nested.pi_pending = false;
|
||||
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
|
||||
if (max_irr != 256) {
|
||||
vapic_page = vmx->nested.virtual_apic_map.hva;
|
||||
if (!vapic_page)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
|
||||
vapic_page, &max_irr);
|
||||
|
@ -3713,6 +3720,7 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
|
||||
|
@ -3887,8 +3895,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
no_vmexit:
|
||||
vmx_complete_nested_posted_interrupt(vcpu);
|
||||
return 0;
|
||||
return vmx_complete_nested_posted_interrupt(vcpu);
|
||||
}
|
||||
|
||||
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
|
||||
|
@ -5481,8 +5488,6 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
|
|||
{
|
||||
u32 index = kvm_rcx_read(vcpu);
|
||||
u64 new_eptp;
|
||||
bool accessed_dirty;
|
||||
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
|
||||
|
||||
if (!nested_cpu_has_eptp_switching(vmcs12) ||
|
||||
!nested_cpu_has_ept(vmcs12))
|
||||
|
@ -5491,13 +5496,10 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
|
|||
if (index >= VMFUNC_EPTP_ENTRIES)
|
||||
return 1;
|
||||
|
||||
|
||||
if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
|
||||
&new_eptp, index * 8, 8))
|
||||
return 1;
|
||||
|
||||
accessed_dirty = !!(new_eptp & VMX_EPTP_AD_ENABLE_BIT);
|
||||
|
||||
/*
|
||||
* If the (L2) guest does a vmfunc to the currently
|
||||
* active ept pointer, we don't have to do anything else
|
||||
|
@ -5506,8 +5508,6 @@ static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
|
|||
if (!nested_vmx_check_eptp(vcpu, new_eptp))
|
||||
return 1;
|
||||
|
||||
mmu->ept_ad = accessed_dirty;
|
||||
mmu->mmu_role.base.ad_disabled = !accessed_dirty;
|
||||
vmcs12->ept_pointer = new_eptp;
|
||||
|
||||
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
|
||||
|
@ -5533,7 +5533,7 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
|
||||
vmcs12 = get_vmcs12(vcpu);
|
||||
if ((vmcs12->vm_function_control & (1 << function)) == 0)
|
||||
if (!(vmcs12->vm_function_control & BIT_ULL(function)))
|
||||
goto fail;
|
||||
|
||||
switch (function) {
|
||||
|
@ -5806,6 +5806,9 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
|
|||
else if (is_breakpoint(intr_info) &&
|
||||
vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
|
||||
return true;
|
||||
else if (is_alignment_check(intr_info) &&
|
||||
!vmx_guest_inject_ac(vcpu))
|
||||
return true;
|
||||
return false;
|
||||
case EXIT_REASON_EXTERNAL_INTERRUPT:
|
||||
return true;
|
||||
|
|
|
@ -117,6 +117,11 @@ static inline bool is_gp_fault(u32 intr_info)
|
|||
return is_exception_n(intr_info, GP_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_alignment_check(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, AC_VECTOR);
|
||||
}
|
||||
|
||||
static inline bool is_machine_check(u32 intr_info)
|
||||
{
|
||||
return is_exception_n(intr_info, MC_VECTOR);
|
||||
|
|
|
@ -4829,7 +4829,7 @@ static int handle_machine_check(struct kvm_vcpu *vcpu)
|
|||
* - Guest has #AC detection enabled in CR0
|
||||
* - Guest EFLAGS has AC bit set
|
||||
*/
|
||||
static inline bool guest_inject_ac(struct kvm_vcpu *vcpu)
|
||||
bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT))
|
||||
return true;
|
||||
|
@ -4937,7 +4937,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
|
|||
kvm_run->debug.arch.exception = ex_no;
|
||||
break;
|
||||
case AC_VECTOR:
|
||||
if (guest_inject_ac(vcpu)) {
|
||||
if (vmx_guest_inject_ac(vcpu)) {
|
||||
kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
@ -387,6 +387,7 @@ void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
|||
void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
|
||||
u64 construct_eptp(struct kvm_vcpu *vcpu, hpa_t root_hpa, int root_level);
|
||||
|
||||
bool vmx_guest_inject_ac(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu);
|
||||
void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu);
|
||||
bool vmx_nmi_blocked(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -9179,7 +9179,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
|
|||
}
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
|
||||
kvm_vcpu_flush_tlb_current(vcpu);
|
||||
if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
|
||||
if (kvm_check_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu))
|
||||
kvm_vcpu_flush_tlb_guest(vcpu);
|
||||
|
||||
if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
|
||||
|
@ -10462,6 +10462,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
|
|||
|
||||
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
||||
{
|
||||
unsigned long old_cr0 = kvm_read_cr0(vcpu);
|
||||
|
||||
kvm_lapic_reset(vcpu, init_event);
|
||||
|
||||
vcpu->arch.hflags = 0;
|
||||
|
@ -10530,6 +10532,17 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
vcpu->arch.ia32_xss = 0;
|
||||
|
||||
static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
|
||||
|
||||
/*
|
||||
* Reset the MMU context if paging was enabled prior to INIT (which is
|
||||
* implied if CR0.PG=1 as CR0 will be '0' prior to RESET). Unlike the
|
||||
* standard CR0/CR4/EFER modification paths, only CR0.PG needs to be
|
||||
* checked because it is unconditionally cleared on INIT and all other
|
||||
* paging related bits are ignored if paging is disabled, i.e. CR0.WP,
|
||||
* CR4, and EFER changes are all irrelevant if CR0.PG was '0'.
|
||||
*/
|
||||
if (old_cr0 & X86_CR0_PG)
|
||||
kvm_mmu_reset_context(vcpu);
|
||||
}
|
||||
|
||||
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <asm/nospec-branch.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/perf_event.h>
|
||||
|
||||
#include "mm_internal.h"
|
||||
|
||||
|
@ -404,9 +405,14 @@ static inline void cr4_update_pce_mm(struct mm_struct *mm)
|
|||
{
|
||||
if (static_branch_unlikely(&rdpmc_always_available_key) ||
|
||||
(!static_branch_unlikely(&rdpmc_never_available_key) &&
|
||||
atomic_read(&mm->context.perf_rdpmc_allowed)))
|
||||
atomic_read(&mm->context.perf_rdpmc_allowed))) {
|
||||
/*
|
||||
* Clear the existing dirty counters to
|
||||
* prevent the leak for an RDPMC task.
|
||||
*/
|
||||
perf_clear_dirty_counters();
|
||||
cr4_set_bits_irqsoff(X86_CR4_PCE);
|
||||
else
|
||||
} else
|
||||
cr4_clear_bits_irqsoff(X86_CR4_PCE);
|
||||
}
|
||||
|
||||
|
|
|
@ -1297,7 +1297,7 @@ st: if (is_imm8(insn->off))
|
|||
emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
|
||||
struct exception_table_entry *ex;
|
||||
u8 *_insn = image + proglen;
|
||||
u8 *_insn = image + proglen + (start_of_ldx - temp);
|
||||
s64 delta;
|
||||
|
||||
/* populate jmp_offset for JMP above */
|
||||
|
|
|
@ -145,7 +145,6 @@ void secondary_start_kernel(void)
|
|||
cpumask_set_cpu(cpu, mm_cpumask(mm));
|
||||
enter_lazy_tlb(mm, current);
|
||||
|
||||
preempt_disable();
|
||||
trace_hardirqs_off();
|
||||
|
||||
calibrate_delay();
|
||||
|
|
|
@ -2695,9 +2695,15 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
|||
* costly and complicated.
|
||||
*/
|
||||
if (unlikely(!bfqd->nonrot_with_queueing)) {
|
||||
if (bic->stable_merge_bfqq &&
|
||||
/*
|
||||
* Make sure also that bfqq is sync, because
|
||||
* bic->stable_merge_bfqq may point to some queue (for
|
||||
* stable merging) also if bic is associated with a
|
||||
* sync queue, but this bfqq is async
|
||||
*/
|
||||
if (bfq_bfqq_sync(bfqq) && bic->stable_merge_bfqq &&
|
||||
!bfq_bfqq_just_created(bfqq) &&
|
||||
time_is_after_jiffies(bfqq->split_time +
|
||||
time_is_before_jiffies(bfqq->split_time +
|
||||
msecs_to_jiffies(200))) {
|
||||
struct bfq_queue *stable_merge_bfqq =
|
||||
bic->stable_merge_bfqq;
|
||||
|
@ -6129,11 +6135,13 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
|
|||
* of other queues. But a false waker will unjustly steal
|
||||
* bandwidth to its supposedly woken queue. So considering
|
||||
* also shared queues in the waking mechanism may cause more
|
||||
* control troubles than throughput benefits. Then do not set
|
||||
* last_completed_rq_bfqq to bfqq if bfqq is a shared queue.
|
||||
* control troubles than throughput benefits. Then reset
|
||||
* last_completed_rq_bfqq if bfqq is a shared queue.
|
||||
*/
|
||||
if (!bfq_bfqq_coop(bfqq))
|
||||
bfqd->last_completed_rq_bfqq = bfqq;
|
||||
else
|
||||
bfqd->last_completed_rq_bfqq = NULL;
|
||||
|
||||
/*
|
||||
* If we are waiting to discover whether the request pattern
|
||||
|
|
13
block/bio.c
13
block/bio.c
|
@ -1375,8 +1375,7 @@ static inline bool bio_remaining_done(struct bio *bio)
|
|||
*
|
||||
* bio_endio() can be called several times on a bio that has been chained
|
||||
* using bio_chain(). The ->bi_end_io() function will only be called the
|
||||
* last time. At this point the BLK_TA_COMPLETE tracing event will be
|
||||
* generated if BIO_TRACE_COMPLETION is set.
|
||||
* last time.
|
||||
**/
|
||||
void bio_endio(struct bio *bio)
|
||||
{
|
||||
|
@ -1389,6 +1388,11 @@ again:
|
|||
if (bio->bi_bdev)
|
||||
rq_qos_done_bio(bio->bi_bdev->bd_disk->queue, bio);
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
/*
|
||||
* Need to have a real endio function for chained bios, otherwise
|
||||
* various corner cases will break (like stacking block devices that
|
||||
|
@ -1402,11 +1406,6 @@ again:
|
|||
goto again;
|
||||
}
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bio->bi_bdev->bd_disk->queue, bio);
|
||||
bio_clear_flag(bio, BIO_TRACE_COMPLETION);
|
||||
}
|
||||
|
||||
blk_throtl_bio_endio(bio);
|
||||
/* release cgroup info */
|
||||
bio_uninit(bio);
|
||||
|
|
|
@ -219,8 +219,6 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
|||
unsigned long flags = 0;
|
||||
struct blk_flush_queue *fq = blk_get_flush_queue(q, flush_rq->mq_ctx);
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
|
||||
/* release the tag's ownership to the req cloned from */
|
||||
spin_lock_irqsave(&fq->mq_flush_lock, flags);
|
||||
|
||||
|
@ -230,6 +228,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
|
|||
return;
|
||||
}
|
||||
|
||||
blk_account_io_flush(flush_rq);
|
||||
/*
|
||||
* Flush request has to be marked as IDLE when it is really ended
|
||||
* because its .end_io() is called from timeout code path too for
|
||||
|
|
|
@ -559,10 +559,14 @@ static inline unsigned int blk_rq_get_max_segments(struct request *rq)
|
|||
static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
|
||||
unsigned int nr_phys_segs)
|
||||
{
|
||||
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
||||
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
||||
goto no_merge;
|
||||
|
||||
if (blk_integrity_merge_bio(req->q, req, bio) == false)
|
||||
/* discard request merge won't add new segment */
|
||||
if (req_op(req) == REQ_OP_DISCARD)
|
||||
return 1;
|
||||
|
||||
if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
|
||||
goto no_merge;
|
||||
|
||||
/*
|
||||
|
|
|
@ -199,6 +199,20 @@ struct bt_iter_data {
|
|||
bool reserved;
|
||||
};
|
||||
|
||||
static struct request *blk_mq_find_and_get_req(struct blk_mq_tags *tags,
|
||||
unsigned int bitnr)
|
||||
{
|
||||
struct request *rq;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&tags->lock, flags);
|
||||
rq = tags->rqs[bitnr];
|
||||
if (!rq || !refcount_inc_not_zero(&rq->ref))
|
||||
rq = NULL;
|
||||
spin_unlock_irqrestore(&tags->lock, flags);
|
||||
return rq;
|
||||
}
|
||||
|
||||
static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
||||
{
|
||||
struct bt_iter_data *iter_data = data;
|
||||
|
@ -206,18 +220,22 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
struct blk_mq_tags *tags = hctx->tags;
|
||||
bool reserved = iter_data->reserved;
|
||||
struct request *rq;
|
||||
bool ret = true;
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
rq = tags->rqs[bitnr];
|
||||
|
||||
/*
|
||||
* We can hit rq == NULL here, because the tagging functions
|
||||
* test and set the bit before assigning ->rqs[].
|
||||
*/
|
||||
if (rq && rq->q == hctx->queue && rq->mq_hctx == hctx)
|
||||
return iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
return true;
|
||||
rq = blk_mq_find_and_get_req(tags, bitnr);
|
||||
if (!rq)
|
||||
return true;
|
||||
|
||||
if (rq->q == hctx->queue && rq->mq_hctx == hctx)
|
||||
ret = iter_data->fn(hctx, rq, iter_data->data, reserved);
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -264,6 +282,8 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
struct blk_mq_tags *tags = iter_data->tags;
|
||||
bool reserved = iter_data->flags & BT_TAG_ITER_RESERVED;
|
||||
struct request *rq;
|
||||
bool ret = true;
|
||||
bool iter_static_rqs = !!(iter_data->flags & BT_TAG_ITER_STATIC_RQS);
|
||||
|
||||
if (!reserved)
|
||||
bitnr += tags->nr_reserved_tags;
|
||||
|
@ -272,16 +292,19 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
|
|||
* We can hit rq == NULL here, because the tagging functions
|
||||
* test and set the bit before assigning ->rqs[].
|
||||
*/
|
||||
if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
|
||||
if (iter_static_rqs)
|
||||
rq = tags->static_rqs[bitnr];
|
||||
else
|
||||
rq = tags->rqs[bitnr];
|
||||
rq = blk_mq_find_and_get_req(tags, bitnr);
|
||||
if (!rq)
|
||||
return true;
|
||||
if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
|
||||
!blk_mq_request_started(rq))
|
||||
return true;
|
||||
return iter_data->fn(rq, iter_data->data, reserved);
|
||||
|
||||
if (!(iter_data->flags & BT_TAG_ITER_STARTED) ||
|
||||
blk_mq_request_started(rq))
|
||||
ret = iter_data->fn(rq, iter_data->data, reserved);
|
||||
if (!iter_static_rqs)
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -348,6 +371,9 @@ void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
|
|||
* indicates whether or not @rq is a reserved request. Return
|
||||
* true to continue iterating tags, false to stop.
|
||||
* @priv: Will be passed as second argument to @fn.
|
||||
*
|
||||
* We grab one request reference before calling @fn and release it after
|
||||
* @fn returns.
|
||||
*/
|
||||
void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
|
||||
busy_tag_iter_fn *fn, void *priv)
|
||||
|
@ -516,6 +542,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
|
|||
|
||||
tags->nr_tags = total_tags;
|
||||
tags->nr_reserved_tags = reserved_tags;
|
||||
spin_lock_init(&tags->lock);
|
||||
|
||||
if (blk_mq_is_sbitmap_shared(flags))
|
||||
return tags;
|
||||
|
|
|
@ -20,6 +20,12 @@ struct blk_mq_tags {
|
|||
struct request **rqs;
|
||||
struct request **static_rqs;
|
||||
struct list_head page_list;
|
||||
|
||||
/*
|
||||
* used to clear request reference in rqs[] before freeing one
|
||||
* request pool
|
||||
*/
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
|
||||
|
|
|
@ -909,6 +909,14 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
|
|||
return false;
|
||||
}
|
||||
|
||||
void blk_mq_put_rq_ref(struct request *rq)
|
||||
{
|
||||
if (is_flush_rq(rq, rq->mq_hctx))
|
||||
rq->end_io(rq, 0);
|
||||
else if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
}
|
||||
|
||||
static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
||||
struct request *rq, void *priv, bool reserved)
|
||||
{
|
||||
|
@ -942,11 +950,7 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
|
|||
if (blk_mq_req_expired(rq, next))
|
||||
blk_mq_rq_timed_out(rq, reserved);
|
||||
|
||||
if (is_flush_rq(rq, hctx))
|
||||
rq->end_io(rq, 0);
|
||||
else if (refcount_dec_and_test(&rq->ref))
|
||||
__blk_mq_free_request(rq);
|
||||
|
||||
blk_mq_put_rq_ref(rq);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1220,9 +1224,6 @@ static void blk_mq_update_dispatch_busy(struct blk_mq_hw_ctx *hctx, bool busy)
|
|||
{
|
||||
unsigned int ewma;
|
||||
|
||||
if (hctx->queue->elevator)
|
||||
return;
|
||||
|
||||
ewma = hctx->dispatch_busy;
|
||||
|
||||
if (!ewma && !busy)
|
||||
|
@ -2303,6 +2304,45 @@ queue_exit:
|
|||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
static size_t order_to_size(unsigned int order)
|
||||
{
|
||||
return (size_t)PAGE_SIZE << order;
|
||||
}
|
||||
|
||||
/* called before freeing request pool in @tags */
|
||||
static void blk_mq_clear_rq_mapping(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_tags *tags, unsigned int hctx_idx)
|
||||
{
|
||||
struct blk_mq_tags *drv_tags = set->tags[hctx_idx];
|
||||
struct page *page;
|
||||
unsigned long flags;
|
||||
|
||||
list_for_each_entry(page, &tags->page_list, lru) {
|
||||
unsigned long start = (unsigned long)page_address(page);
|
||||
unsigned long end = start + order_to_size(page->private);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < set->queue_depth; i++) {
|
||||
struct request *rq = drv_tags->rqs[i];
|
||||
unsigned long rq_addr = (unsigned long)rq;
|
||||
|
||||
if (rq_addr >= start && rq_addr < end) {
|
||||
WARN_ON_ONCE(refcount_read(&rq->ref) != 0);
|
||||
cmpxchg(&drv_tags->rqs[i], rq, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait until all pending iteration is done.
|
||||
*
|
||||
* Request reference is cleared and it is guaranteed to be observed
|
||||
* after the ->lock is released.
|
||||
*/
|
||||
spin_lock_irqsave(&drv_tags->lock, flags);
|
||||
spin_unlock_irqrestore(&drv_tags->lock, flags);
|
||||
}
|
||||
|
||||
void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
|
@ -2321,6 +2361,8 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
|
|||
}
|
||||
}
|
||||
|
||||
blk_mq_clear_rq_mapping(set, tags, hctx_idx);
|
||||
|
||||
while (!list_empty(&tags->page_list)) {
|
||||
page = list_first_entry(&tags->page_list, struct page, lru);
|
||||
list_del_init(&page->lru);
|
||||
|
@ -2380,11 +2422,6 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
|
|||
return tags;
|
||||
}
|
||||
|
||||
static size_t order_to_size(unsigned int order)
|
||||
{
|
||||
return (size_t)PAGE_SIZE << order;
|
||||
}
|
||||
|
||||
static int blk_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
|
||||
unsigned int hctx_idx, int node)
|
||||
{
|
||||
|
|
|
@ -47,6 +47,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
|||
void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
|
||||
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
|
||||
struct blk_mq_ctx *start);
|
||||
void blk_mq_put_rq_ref(struct request *rq);
|
||||
|
||||
/*
|
||||
* Internal helpers for allocating/freeing the request map
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <linux/blk_types.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
#include "blk-mq-debugfs.h"
|
||||
|
||||
|
@ -99,8 +100,21 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
|
|||
|
||||
static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
|
||||
{
|
||||
/*
|
||||
* No IO can be in-flight when adding rqos, so freeze queue, which
|
||||
* is fine since we only support rq_qos for blk-mq queue.
|
||||
*
|
||||
* Reuse ->queue_lock for protecting against other concurrent
|
||||
* rq_qos adding/deleting
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
rqos->next = q->rq_qos;
|
||||
q->rq_qos = rqos;
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
if (rqos->ops->debugfs_attrs)
|
||||
blk_mq_debugfs_register_rqos(rqos);
|
||||
|
@ -110,12 +124,22 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
|
|||
{
|
||||
struct rq_qos **cur;
|
||||
|
||||
/*
|
||||
* See comment in rq_qos_add() about freezing queue & using
|
||||
* ->queue_lock.
|
||||
*/
|
||||
blk_mq_freeze_queue(q);
|
||||
|
||||
spin_lock_irq(&q->queue_lock);
|
||||
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
|
||||
if (*cur == rqos) {
|
||||
*cur = rqos->next;
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock_irq(&q->queue_lock);
|
||||
|
||||
blk_mq_unfreeze_queue(q);
|
||||
|
||||
blk_mq_debugfs_unregister_rqos(rqos);
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user