mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
KVM x86 and selftests fixes for 6.12:
x86: - When emulating a guest TLB flush for a nested guest, flush vpid01, not vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if L2 and L1 are sharing VPID '0' (from L1's perspective). - Fix a bug in the SNP initialization flow where KVM would return '0' to userspace instead of -errno on failure. - Move the Intel PT virtualization (i.e. outputting host trace to host buffer and guest trace to guest buffer) behind CONFIG_BROKEN. - Fix memory leak on failure of KVM_SEV_SNP_LAUNCH_START - Fix a bug where KVM fails to inject an interrupt from the IRR after KVM_SET_LAPIC. Selftests: - Increase the timeout for the memslot performance selftest to avoid false failures on arm64 and nested x86 platforms. - Fix a goof in the guest_memfd selftest where a for-loop initialized a bit mask to zero instead of BIT(0). - Disable strict aliasing when building KVM selftests to prevent the compiler from treating things like "u64 *" to "uint64_t *" cases as undefined behavior, which can lead to nasty, hard to debug failures. - Force -march=x86-64-v2 for KVM x86 selftests if and only if the uarch is supported by the compiler. - Fix broken compilation of kvm selftests after a header sync in tools/ -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmczm1QUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOLKwf+IjkJHZ/LS95HuP/0QLM17Sc4MmiZ Pk5gLd5un7BBSLA98RvALR/YPnsA7emEJ34bE/8lQ6R5VSZ5PrIzF+29f60HzRFe EDi1/24dqnzdWn50na5nk7A2QhFpfnLQQTl7vMqPFsrU7gfLuHQI6ABp9kloEwP/ xnjAT683IWNX9v0N2A8kNemy9NNMGssJk1ssDTGzNflSyRNL8cLPGlPkZqAIMsM6 fHjkDRg0UxasUDkL5CjwnTSdBGoz+/Myyz4unFlYGJB9D3+ev2qDlMqATO4Jfik/ peJMZ65i8/8/7MgKCTn8qQuT0FLLEvxTuzDHUSGzjMZl0DGaZi2BPETNqg== =nW8/ -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "x86 and selftests fixes. x86: - When emulating a guest TLB flush for a nested guest, flush vpid01, not vpid02, if L2 is active but VPID is disabled in vmcs12, i.e. if L2 and L1 are sharing VPID '0' (from L1's perspective). - Fix a bug in the SNP initialization flow where KVM would return '0' to userspace instead of -errno on failure. - Move the Intel PT virtualization (i.e. outputting host trace to host buffer and guest trace to guest buffer) behind CONFIG_BROKEN. - Fix memory leak on failure of KVM_SEV_SNP_LAUNCH_START - Fix a bug where KVM fails to inject an interrupt from the IRR after KVM_SET_LAPIC. Selftests: - Increase the timeout for the memslot performance selftest to avoid false failures on arm64 and nested x86 platforms. - Fix a goof in the guest_memfd selftest where a for-loop initialized a bit mask to zero instead of BIT(0). - Disable strict aliasing when building KVM selftests to prevent the compiler from treating things like "u64 *" to "uint64_t *" cases as undefined behavior, which can lead to nasty, hard to debug failures. - Force -march=x86-64-v2 for KVM x86 selftests if and only if the uarch is supported by the compiler. - Fix broken compilation of kvm selftests after a header sync in tools/" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: VMX: Bury Intel PT virtualization (guest/host mode) behind CONFIG_BROKEN KVM: x86: Unconditionally set irr_pending when updating APICv state kvm: svm: Fix gctx page leak on invalid inputs KVM: selftests: use X86_MEMTYPE_WB instead of VMX_BASIC_MEM_TYPE_WB KVM: SVM: Propagate error from snp_guest_req_init() to userspace KVM: nVMX: Treat vpid01 as current if L2 is active, but with VPID disabled KVM: selftests: Don't force -march=x86-64-v2 if it's unsupported KVM: selftests: Disable strict aliasing KVM: selftests: fix unintentional noop test in guest_memfd_test.c KVM: selftests: memslot_perf_test: increase guest sync timeout
This commit is contained in:
commit
14b6320953
|
@ -2629,19 +2629,26 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
|
|||
{
|
||||
struct kvm_lapic *apic = vcpu->arch.apic;
|
||||
|
||||
if (apic->apicv_active) {
|
||||
/* irr_pending is always true when apicv is activated. */
|
||||
apic->irr_pending = true;
|
||||
/*
|
||||
* When APICv is enabled, KVM must always search the IRR for a pending
|
||||
* IRQ, as other vCPUs and devices can set IRR bits even if the vCPU
|
||||
* isn't running. If APICv is disabled, KVM _should_ search the IRR
|
||||
* for a pending IRQ. But KVM currently doesn't ensure *all* hardware,
|
||||
* e.g. CPUs and IOMMUs, has seen the change in state, i.e. searching
|
||||
* the IRR at this time could race with IRQ delivery from hardware that
|
||||
* still sees APICv as being enabled.
|
||||
*
|
||||
* FIXME: Ensure other vCPUs and devices observe the change in APICv
|
||||
* state prior to updating KVM's metadata caches, so that KVM
|
||||
* can safely search the IRR and set irr_pending accordingly.
|
||||
*/
|
||||
apic->irr_pending = true;
|
||||
|
||||
if (apic->apicv_active)
|
||||
apic->isr_count = 1;
|
||||
} else {
|
||||
/*
|
||||
* Don't clear irr_pending, searching the IRR can race with
|
||||
* updates from the CPU as APICv is still active from hardware's
|
||||
* perspective. The flag will be cleared as appropriate when
|
||||
* KVM injects the interrupt.
|
||||
*/
|
||||
else
|
||||
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
|
||||
}
|
||||
|
||||
apic->highest_isr_cache = -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -450,8 +450,11 @@ static int __sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
|||
goto e_free;
|
||||
|
||||
/* This needs to happen after SEV/SNP firmware initialization. */
|
||||
if (vm_type == KVM_X86_SNP_VM && snp_guest_req_init(kvm))
|
||||
goto e_free;
|
||||
if (vm_type == KVM_X86_SNP_VM) {
|
||||
ret = snp_guest_req_init(kvm);
|
||||
if (ret)
|
||||
goto e_free;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&sev->regions_list);
|
||||
INIT_LIST_HEAD(&sev->mirror_vms);
|
||||
|
@ -2212,10 +2215,6 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|||
if (sev->snp_context)
|
||||
return -EINVAL;
|
||||
|
||||
sev->snp_context = snp_context_create(kvm, argp);
|
||||
if (!sev->snp_context)
|
||||
return -ENOTTY;
|
||||
|
||||
if (params.flags)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -2230,6 +2229,10 @@ static int snp_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
|||
if (params.policy & SNP_POLICY_MASK_SINGLE_SOCKET)
|
||||
return -EINVAL;
|
||||
|
||||
sev->snp_context = snp_context_create(kvm, argp);
|
||||
if (!sev->snp_context)
|
||||
return -ENOTTY;
|
||||
|
||||
start.gctx_paddr = __psp_pa(sev->snp_context);
|
||||
start.policy = params.policy;
|
||||
memcpy(start.gosvw, params.gosvw, sizeof(params.gosvw));
|
||||
|
|
|
@ -1197,11 +1197,14 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
|
|||
kvm_hv_nested_transtion_tlb_flush(vcpu, enable_ept);
|
||||
|
||||
/*
|
||||
* If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
|
||||
* for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
|
||||
* full TLB flush from the guest's perspective. This is required even
|
||||
* if VPID is disabled in the host as KVM may need to synchronize the
|
||||
* MMU in response to the guest TLB flush.
|
||||
* If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
|
||||
* same VPID as the host, and so architecturally, linear and combined
|
||||
* mappings for VPID=0 must be flushed at VM-Enter and VM-Exit. KVM
|
||||
* emulates L2 sharing L1's VPID=0 by using vpid01 while running L2,
|
||||
* and so KVM must also emulate TLB flush of VPID=0, i.e. vpid01. This
|
||||
* is required if VPID is disabled in KVM, as a TLB flush (there are no
|
||||
* VPIDs) still occurs from L1's perspective, and KVM may need to
|
||||
* synchronize the MMU in response to the guest TLB flush.
|
||||
*
|
||||
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
|
||||
* EPT is a special snowflake, as guest-physical mappings aren't
|
||||
|
@ -2315,6 +2318,17 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
|
|||
|
||||
vmcs_write64(VMCS_LINK_POINTER, INVALID_GPA);
|
||||
|
||||
/*
|
||||
* If VPID is disabled, then guest TLB accesses use VPID=0, i.e. the
|
||||
* same VPID as the host. Emulate this behavior by using vpid01 for L2
|
||||
* if VPID is disabled in vmcs12. Note, if VPID is disabled, VM-Enter
|
||||
* and VM-Exit are architecturally required to flush VPID=0, but *only*
|
||||
* VPID=0. I.e. using vpid02 would be ok (so long as KVM emulates the
|
||||
* required flushes), but doing so would cause KVM to over-flush. E.g.
|
||||
* if L1 runs L2 X with VPID12=1, then runs L2 Y with VPID12 disabled,
|
||||
* and then runs L2 X again, then KVM can and should retain TLB entries
|
||||
* for VPID12=1.
|
||||
*/
|
||||
if (enable_vpid) {
|
||||
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
|
||||
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
|
||||
|
@ -5950,6 +5964,12 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
|
|||
return nested_vmx_fail(vcpu,
|
||||
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
||||
|
||||
/*
|
||||
* Always flush the effective vpid02, i.e. never flush the current VPID
|
||||
* and never explicitly flush vpid01. INVVPID targets a VPID, not a
|
||||
* VMCS, and so whether or not the current vmcs12 has VPID enabled is
|
||||
* irrelevant (and there may not be a loaded vmcs12).
|
||||
*/
|
||||
vpid02 = nested_get_vpid02(vcpu);
|
||||
switch (type) {
|
||||
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
|
||||
|
|
|
@ -217,9 +217,11 @@ module_param(ple_window_shrink, uint, 0444);
|
|||
static unsigned int ple_window_max = KVM_VMX_DEFAULT_PLE_WINDOW_MAX;
|
||||
module_param(ple_window_max, uint, 0444);
|
||||
|
||||
/* Default is SYSTEM mode, 1 for host-guest mode */
|
||||
/* Default is SYSTEM mode, 1 for host-guest mode (which is BROKEN) */
|
||||
int __read_mostly pt_mode = PT_MODE_SYSTEM;
|
||||
#ifdef CONFIG_BROKEN
|
||||
module_param(pt_mode, int, S_IRUGO);
|
||||
#endif
|
||||
|
||||
struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
|
||||
|
||||
|
@ -3216,7 +3218,7 @@ void vmx_flush_tlb_all(struct kvm_vcpu *vcpu)
|
|||
|
||||
static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (is_guest_mode(vcpu))
|
||||
if (is_guest_mode(vcpu) && nested_cpu_has_vpid(get_vmcs12(vcpu)))
|
||||
return nested_get_vpid02(vcpu);
|
||||
return to_vmx(vcpu)->vpid;
|
||||
}
|
||||
|
|
|
@ -241,16 +241,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
|
|||
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
|
||||
-fno-builtin-memcmp -fno-builtin-memcpy \
|
||||
-fno-builtin-memset -fno-builtin-strnlen \
|
||||
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
|
||||
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
|
||||
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
|
||||
$(KHDR_INCLUDES)
|
||||
-fno-stack-protector -fno-PIE -fno-strict-aliasing \
|
||||
-I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \
|
||||
-I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \
|
||||
-I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
|
||||
ifeq ($(ARCH),s390)
|
||||
CFLAGS += -march=z10
|
||||
endif
|
||||
ifeq ($(ARCH),x86)
|
||||
ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0)
|
||||
CFLAGS += -march=x86-64-v2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(ARCH),arm64)
|
||||
tools_dir := $(top_srcdir)/tools
|
||||
arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
|
||||
|
|
|
@ -134,7 +134,7 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
|
|||
size);
|
||||
}
|
||||
|
||||
for (flag = 0; flag; flag <<= 1) {
|
||||
for (flag = BIT(0); flag; flag <<= 1) {
|
||||
fd = __vm_create_guest_memfd(vm, page_size, flag);
|
||||
TEST_ASSERT(fd == -1 && errno == EINVAL,
|
||||
"guest_memfd() with flag '0x%lx' should fail with EINVAL",
|
||||
|
|
|
@ -200,7 +200,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
|
|||
if (vmx->eptp_gpa) {
|
||||
uint64_t ept_paddr;
|
||||
struct eptPageTablePointer eptp = {
|
||||
.memory_type = VMX_BASIC_MEM_TYPE_WB,
|
||||
.memory_type = X86_MEMTYPE_WB,
|
||||
.page_walk_length = 3, /* + 1 */
|
||||
.ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
|
||||
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
|
||||
|
|
|
@ -417,7 +417,7 @@ static bool _guest_should_exit(void)
|
|||
*/
|
||||
static noinline void host_perform_sync(struct sync_area *sync)
|
||||
{
|
||||
alarm(2);
|
||||
alarm(10);
|
||||
|
||||
atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
|
||||
while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
|
||||
|
|
Loading…
Reference in New Issue
Block a user