mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-05 13:25:20 +02:00
* fix latent bug in how usage of large pages is determined for
confidential VMs * fix "underline too short" in docs * eliminate log spam from limited APIC timer periods * disallow pre-faulting of memory before SEV-SNP VMs are initialized * delay clearing and encrypting private memory until it is added to guest page tables * this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function * fix compilation error that the RISC-V merge introduced in selftests -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmatCoMUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroMUmgf9FwuSypOyZeZM4DKpNoMdaDQGVBn2 nTUYDJdiggmUNWA5MenqKtZ5N7G79iDO5HNDOUGBXn33f78EimDxsSC+Xfy54RNF SwEUZxQR/y81xOF2LIzfisWkNY+4Bf9fDALUbAlOj/O0E/YHDO9gk+ZNnvdHkWMe 72euiii1xlIV/+Snq7QQZU2UiUNalIfN0wCtPRYG9RGbG+yF2ksm01QU3aE8Q2uu aSN3/DxfiFmKPEP5YQ1qXyntpQ8hA1WfONuUUhmgBgZlSdPS93nyL7y030QDzhgn /JayovN14I3S73rLcepmw3Jx4vTltX1QJA+JqBoKBv/gXJQ8ZCqyLzqrvQ== =ExK2 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm updates from Paolo Bonzini: "The bulk of the changes here is a largish change to guest_memfd, delaying the clearing and encryption of guest-private pages until they are actually added to guest page tables. This started as "let's make it impossible to misuse the API" for SEV-SNP; but then it ballooned a bit. The new logic is generally simpler and more ready for hugepage support in guest_memfd. Summary: - fix latent bug in how usage of large pages is determined for confidential VMs - fix "underline too short" in docs - eliminate log spam from limited APIC timer periods - disallow pre-faulting of memory before SEV-SNP VMs are initialized - delay clearing and encrypting private memory until it is added to guest page tables - this change also enables another small cleanup: the checks in SNP_LAUNCH_UPDATE that limit it to non-populated, private pages can now be moved in the common kvm_gmem_populate() function - fix compilation error that the RISC-V merge introduced in selftests" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: x86/mmu: fix determination of max NPT mapping level for private pages KVM: riscv: selftests: Fix compile error KVM: guest_memfd: abstract how prepared folios are recorded KVM: guest_memfd: let kvm_gmem_populate() operate only on private gfns KVM: extend kvm_range_has_memory_attributes() to check subset of attributes KVM: cleanup and add shortcuts to kvm_range_has_memory_attributes() KVM: guest_memfd: move check for already-populated page to common code KVM: remove kvm_arch_gmem_prepare_needed() KVM: guest_memfd: make kvm_gmem_prepare_folio() operate on a single struct kvm KVM: guest_memfd: delay kvm_gmem_prepare_folio() until the memory is passed to the guest KVM: guest_memfd: return locked folio from __kvm_gmem_get_pfn KVM: rename CONFIG_HAVE_KVM_GMEM_* to CONFIG_HAVE_KVM_ARCH_GMEM_* KVM: guest_memfd: do not go through struct page KVM: guest_memfd: delay folio_mark_uptodate() until after successful preparation KVM: guest_memfd: return folio from __kvm_gmem_get_pfn() KVM: x86: disallow pre-fault for SNP VMs before initialization KVM: Documentation: Fix title underline too short warning KVM: x86: Eliminate log spam from limited APIC timer periods
This commit is contained in:
commit
725d410fac
|
@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap).
|
||||||
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
See KVM_SET_USER_MEMORY_REGION2 for additional details.
|
||||||
|
|
||||||
4.143 KVM_PRE_FAULT_MEMORY
|
4.143 KVM_PRE_FAULT_MEMORY
|
||||||
------------------------
|
---------------------------
|
||||||
|
|
||||||
:Capability: KVM_CAP_PRE_FAULT_MEMORY
|
:Capability: KVM_CAP_PRE_FAULT_MEMORY
|
||||||
:Architectures: none
|
:Architectures: none
|
||||||
|
@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a
|
||||||
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
|
stage-2 read page fault, e.g. faults in memory as needed, but doesn't break
|
||||||
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
|
CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed.
|
||||||
|
|
||||||
|
In the case of confidential VM types where there is an initial set up of
|
||||||
|
private guest memory before the guest is 'finalized'/measured, this ioctl
|
||||||
|
should only be issued after completing all the necessary setup to put the
|
||||||
|
guest into a 'finalized' state so that the above semantics can be reliably
|
||||||
|
ensured.
|
||||||
|
|
||||||
In some cases, multiple vCPUs might share the page tables. In this
|
In some cases, multiple vCPUs might share the page tables. In this
|
||||||
case, the ioctl can be called in parallel.
|
case, the ioctl can be called in parallel.
|
||||||
|
|
||||||
|
|
|
@ -1305,6 +1305,7 @@ struct kvm_arch {
|
||||||
u8 vm_type;
|
u8 vm_type;
|
||||||
bool has_private_mem;
|
bool has_private_mem;
|
||||||
bool has_protected_state;
|
bool has_protected_state;
|
||||||
|
bool pre_fault_allowed;
|
||||||
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
|
||||||
struct list_head active_mmu_pages;
|
struct list_head active_mmu_pages;
|
||||||
struct list_head zapped_obsolete_pages;
|
struct list_head zapped_obsolete_pages;
|
||||||
|
|
|
@ -141,8 +141,8 @@ config KVM_AMD_SEV
|
||||||
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
|
depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m)
|
||||||
select ARCH_HAS_CC_PLATFORM
|
select ARCH_HAS_CC_PLATFORM
|
||||||
select KVM_GENERIC_PRIVATE_MEM
|
select KVM_GENERIC_PRIVATE_MEM
|
||||||
select HAVE_KVM_GMEM_PREPARE
|
select HAVE_KVM_ARCH_GMEM_PREPARE
|
||||||
select HAVE_KVM_GMEM_INVALIDATE
|
select HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
help
|
help
|
||||||
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
|
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
|
||||||
with Encrypted State (SEV-ES) on AMD processors.
|
with Encrypted State (SEV-ES) on AMD processors.
|
||||||
|
|
|
@ -1743,7 +1743,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
|
||||||
s64 min_period = min_timer_period_us * 1000LL;
|
s64 min_period = min_timer_period_us * 1000LL;
|
||||||
|
|
||||||
if (apic->lapic_timer.period < min_period) {
|
if (apic->lapic_timer.period < min_period) {
|
||||||
pr_info_ratelimited(
|
pr_info_once(
|
||||||
"vcpu %i: requested %lld ns "
|
"vcpu %i: requested %lld ns "
|
||||||
"lapic timer period limited to %lld ns\n",
|
"lapic timer period limited to %lld ns\n",
|
||||||
apic->vcpu->vcpu_id,
|
apic->vcpu->vcpu_id,
|
||||||
|
|
|
@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn,
|
||||||
if (req_max_level)
|
if (req_max_level)
|
||||||
max_level = min(max_level, req_max_level);
|
max_level = min(max_level, req_max_level);
|
||||||
|
|
||||||
return req_max_level;
|
return max_level;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
|
||||||
|
@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
|
||||||
u64 end;
|
u64 end;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
if (!vcpu->kvm->arch.pre_fault_allowed)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* reload is efficient when called repeatedly, so we can do it on
|
* reload is efficient when called repeatedly, so we can do it on
|
||||||
* every iteration.
|
* every iteration.
|
||||||
|
@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||||
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
|
const unsigned long end = start + KVM_PAGES_PER_HPAGE(level);
|
||||||
|
|
||||||
if (level == PG_LEVEL_2M)
|
if (level == PG_LEVEL_2M)
|
||||||
return kvm_range_has_memory_attributes(kvm, start, end, attrs);
|
return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs);
|
||||||
|
|
||||||
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
|
for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) {
|
||||||
if (hugepage_test_mixed(slot, gfn, level - 1) ||
|
if (hugepage_test_mixed(slot, gfn, level - 1) ||
|
||||||
|
|
|
@ -2279,18 +2279,11 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf
|
||||||
bool assigned;
|
bool assigned;
|
||||||
int level;
|
int level;
|
||||||
|
|
||||||
if (!kvm_mem_is_private(kvm, gfn)) {
|
|
||||||
pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n",
|
|
||||||
__func__, gfn);
|
|
||||||
ret = -EINVAL;
|
|
||||||
goto err;
|
|
||||||
}
|
|
||||||
|
|
||||||
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
|
ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level);
|
||||||
if (ret || assigned) {
|
if (ret || assigned) {
|
||||||
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
|
pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n",
|
||||||
__func__, gfn, ret, assigned);
|
__func__, gfn, ret, assigned);
|
||||||
ret = -EINVAL;
|
ret = ret ? -EINVAL : -EEXIST;
|
||||||
goto err;
|
goto err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2549,6 +2542,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
|
||||||
data->gctx_paddr = __psp_pa(sev->snp_context);
|
data->gctx_paddr = __psp_pa(sev->snp_context);
|
||||||
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
|
ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages
|
||||||
|
* can be given to the guest simply by marking the RMP entry as private.
|
||||||
|
* This can happen on first access and also with KVM_PRE_FAULT_MEMORY.
|
||||||
|
*/
|
||||||
|
if (!ret)
|
||||||
|
kvm->arch.pre_fault_allowed = true;
|
||||||
|
|
||||||
kfree(id_auth);
|
kfree(id_auth);
|
||||||
|
|
||||||
e_free_id_block:
|
e_free_id_block:
|
||||||
|
|
|
@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm)
|
||||||
to_kvm_sev_info(kvm)->need_init = true;
|
to_kvm_sev_info(kvm)->need_init = true;
|
||||||
|
|
||||||
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
|
kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM);
|
||||||
|
kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!pause_filter_count || !pause_filter_thresh)
|
if (!pause_filter_count || !pause_filter_thresh)
|
||||||
|
|
|
@ -12646,6 +12646,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
||||||
kvm->arch.vm_type = type;
|
kvm->arch.vm_type = type;
|
||||||
kvm->arch.has_private_mem =
|
kvm->arch.has_private_mem =
|
||||||
(type == KVM_X86_SW_PROTECTED_VM);
|
(type == KVM_X86_SW_PROTECTED_VM);
|
||||||
|
/* Decided by the vendor code for other VM types. */
|
||||||
|
kvm->arch.pre_fault_allowed =
|
||||||
|
type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM;
|
||||||
|
|
||||||
ret = kvm_page_track_init(kvm);
|
ret = kvm_page_track_init(kvm);
|
||||||
if (ret)
|
if (ret)
|
||||||
|
@ -13641,19 +13644,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
|
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||||
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm)
|
|
||||||
{
|
|
||||||
return kvm->arch.vm_type == KVM_X86_SNP_VM;
|
|
||||||
}
|
|
||||||
|
|
||||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
|
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order)
|
||||||
{
|
{
|
||||||
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
|
return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
|
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end)
|
||||||
{
|
{
|
||||||
kvm_x86_call(gmem_invalidate)(start, end);
|
kvm_x86_call(gmem_invalidate)(start, end);
|
||||||
|
|
|
@ -2414,7 +2414,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn
|
||||||
}
|
}
|
||||||
|
|
||||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||||
unsigned long attrs);
|
unsigned long mask, unsigned long attrs);
|
||||||
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm,
|
||||||
struct kvm_gfn_range *range);
|
struct kvm_gfn_range *range);
|
||||||
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
|
||||||
|
@ -2445,11 +2445,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
|
||||||
}
|
}
|
||||||
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
#endif /* CONFIG_KVM_PRIVATE_MEM */
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||||
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
|
int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order);
|
||||||
bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
||||||
/**
|
/**
|
||||||
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
|
* kvm_gmem_populate() - Populate/prepare a GPA range with guest data
|
||||||
*
|
*
|
||||||
|
@ -2476,8 +2476,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
||||||
|
|
||||||
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
|
long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
|
||||||
kvm_gmem_populate_cb post_populate, void *opaque);
|
kvm_gmem_populate_cb post_populate, void *opaque);
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
|
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA),
|
KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB),
|
KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD),
|
KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF),
|
KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
|
||||||
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
|
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
|
||||||
|
|
|
@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM
|
||||||
select KVM_PRIVATE_MEM
|
select KVM_PRIVATE_MEM
|
||||||
bool
|
bool
|
||||||
|
|
||||||
config HAVE_KVM_GMEM_PREPARE
|
config HAVE_KVM_ARCH_GMEM_PREPARE
|
||||||
bool
|
bool
|
||||||
depends on KVM_PRIVATE_MEM
|
depends on KVM_PRIVATE_MEM
|
||||||
|
|
||||||
config HAVE_KVM_GMEM_INVALIDATE
|
config HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
bool
|
bool
|
||||||
depends on KVM_PRIVATE_MEM
|
depends on KVM_PRIVATE_MEM
|
||||||
|
|
|
@ -13,84 +13,93 @@ struct kvm_gmem {
|
||||||
struct list_head entry;
|
struct list_head entry;
|
||||||
};
|
};
|
||||||
|
|
||||||
static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio)
|
/**
|
||||||
|
* folio_file_pfn - like folio_file_page, but return a pfn.
|
||||||
|
* @folio: The folio which contains this index.
|
||||||
|
* @index: The index we want to look up.
|
||||||
|
*
|
||||||
|
* Return: The pfn for this index.
|
||||||
|
*/
|
||||||
|
static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
|
return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1));
|
||||||
struct list_head *gmem_list = &inode->i_mapping->i_private_list;
|
}
|
||||||
struct kvm_gmem *gmem;
|
|
||||||
|
|
||||||
list_for_each_entry(gmem, gmem_list, entry) {
|
static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||||
struct kvm_memory_slot *slot;
|
pgoff_t index, struct folio *folio)
|
||||||
struct kvm *kvm = gmem->kvm;
|
{
|
||||||
struct page *page;
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
|
||||||
kvm_pfn_t pfn;
|
kvm_pfn_t pfn = folio_file_pfn(folio, index);
|
||||||
gfn_t gfn;
|
gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff;
|
||||||
int rc;
|
int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio));
|
||||||
|
if (rc) {
|
||||||
if (!kvm_arch_gmem_prepare_needed(kvm))
|
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
|
||||||
continue;
|
index, gfn, pfn, rc);
|
||||||
|
return rc;
|
||||||
slot = xa_load(&gmem->bindings, index);
|
|
||||||
if (!slot)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
page = folio_file_page(folio, index);
|
|
||||||
pfn = page_to_pfn(page);
|
|
||||||
gfn = slot->base_gfn + index - slot->gmem.pgoff;
|
|
||||||
rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page)));
|
|
||||||
if (rc) {
|
|
||||||
pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n",
|
|
||||||
index, gfn, pfn, rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
|
static inline void kvm_gmem_mark_prepared(struct folio *folio)
|
||||||
{
|
{
|
||||||
struct folio *folio;
|
folio_mark_uptodate(folio);
|
||||||
|
}
|
||||||
|
|
||||||
/* TODO: Support huge pages. */
|
/*
|
||||||
folio = filemap_grab_folio(inode->i_mapping, index);
|
* Process @folio, which contains @gfn, so that the guest can use it.
|
||||||
if (IS_ERR(folio))
|
* The folio must be locked and the gfn must be contained in @slot.
|
||||||
return folio;
|
* On successful return the guest sees a zero page so as to avoid
|
||||||
|
* leaking host data and the up-to-date flag is set.
|
||||||
|
*/
|
||||||
|
static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||||
|
gfn_t gfn, struct folio *folio)
|
||||||
|
{
|
||||||
|
unsigned long nr_pages, i;
|
||||||
|
pgoff_t index;
|
||||||
|
int r;
|
||||||
|
|
||||||
|
nr_pages = folio_nr_pages(folio);
|
||||||
|
for (i = 0; i < nr_pages; i++)
|
||||||
|
clear_highpage(folio_page(folio, i));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the up-to-date flag to track whether or not the memory has been
|
* Preparing huge folios should always be safe, since it should
|
||||||
* zeroed before being handed off to the guest. There is no backing
|
* be possible to split them later if needed.
|
||||||
* storage for the memory, so the folio will remain up-to-date until
|
|
||||||
* it's removed.
|
|
||||||
*
|
*
|
||||||
* TODO: Skip clearing pages when trusted firmware will do it when
|
* Right now the folio order is always going to be zero, but the
|
||||||
* assigning memory to the guest.
|
* code is ready for huge folios. The only assumption is that
|
||||||
|
* the base pgoff of memslots is naturally aligned with the
|
||||||
|
* requested page order, ensuring that huge folios can also use
|
||||||
|
* huge page table entries for GPA->HPA mapping.
|
||||||
|
*
|
||||||
|
* The order will be passed when creating the guest_memfd, and
|
||||||
|
* checked when creating memslots.
|
||||||
*/
|
*/
|
||||||
if (!folio_test_uptodate(folio)) {
|
WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio)));
|
||||||
unsigned long nr_pages = folio_nr_pages(folio);
|
index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
||||||
unsigned long i;
|
index = ALIGN_DOWN(index, 1 << folio_order(folio));
|
||||||
|
r = __kvm_gmem_prepare_folio(kvm, slot, index, folio);
|
||||||
|
if (!r)
|
||||||
|
kvm_gmem_mark_prepared(folio);
|
||||||
|
|
||||||
for (i = 0; i < nr_pages; i++)
|
return r;
|
||||||
clear_highpage(folio_page(folio, i));
|
}
|
||||||
|
|
||||||
folio_mark_uptodate(folio);
|
/*
|
||||||
}
|
* Returns a locked folio on success. The caller is responsible for
|
||||||
|
* setting the up-to-date flag before the memory is mapped into the guest.
|
||||||
if (prepare) {
|
* There is no backing storage for the memory, so the folio will remain
|
||||||
int r = kvm_gmem_prepare_folio(inode, index, folio);
|
* up-to-date until it's removed.
|
||||||
if (r < 0) {
|
*
|
||||||
folio_unlock(folio);
|
* Ignore accessed, referenced, and dirty flags. The memory is
|
||||||
folio_put(folio);
|
* unevictable and there is no storage to write back to.
|
||||||
return ERR_PTR(r);
|
*/
|
||||||
}
|
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
|
||||||
}
|
{
|
||||||
|
/* TODO: Support huge pages. */
|
||||||
/*
|
return filemap_grab_folio(inode->i_mapping, index);
|
||||||
* Ignore accessed, referenced, and dirty flags. The memory is
|
|
||||||
* unevictable and there is no storage to write back to.
|
|
||||||
*/
|
|
||||||
return folio;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
|
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
|
||||||
|
@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
folio = kvm_gmem_get_folio(inode, index, true);
|
folio = kvm_gmem_get_folio(inode, index);
|
||||||
if (IS_ERR(folio)) {
|
if (IS_ERR(folio)) {
|
||||||
r = PTR_ERR(folio);
|
r = PTR_ERR(folio);
|
||||||
break;
|
break;
|
||||||
|
@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
|
||||||
return MF_DELAYED;
|
return MF_DELAYED;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
static void kvm_gmem_free_folio(struct folio *folio)
|
static void kvm_gmem_free_folio(struct folio *folio)
|
||||||
{
|
{
|
||||||
struct page *page = folio_page(folio, 0);
|
struct page *page = folio_page(folio, 0);
|
||||||
|
@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = {
|
||||||
.dirty_folio = noop_dirty_folio,
|
.dirty_folio = noop_dirty_folio,
|
||||||
.migrate_folio = kvm_gmem_migrate_folio,
|
.migrate_folio = kvm_gmem_migrate_folio,
|
||||||
.error_remove_folio = kvm_gmem_error_folio,
|
.error_remove_folio = kvm_gmem_error_folio,
|
||||||
#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE
|
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
|
||||||
.free_folio = kvm_gmem_free_folio,
|
.free_folio = kvm_gmem_free_folio,
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
|
||||||
fput(file);
|
fput(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
|
/* Returns a locked folio on success. */
|
||||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
|
static struct folio *
|
||||||
|
__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
|
||||||
|
gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared,
|
||||||
|
int *max_order)
|
||||||
{
|
{
|
||||||
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
|
||||||
struct kvm_gmem *gmem = file->private_data;
|
struct kvm_gmem *gmem = file->private_data;
|
||||||
struct folio *folio;
|
struct folio *folio;
|
||||||
struct page *page;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
if (file != slot->gmem.file) {
|
if (file != slot->gmem.file) {
|
||||||
WARN_ON_ONCE(slot->gmem.file);
|
WARN_ON_ONCE(slot->gmem.file);
|
||||||
return -EFAULT;
|
return ERR_PTR(-EFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
gmem = file->private_data;
|
gmem = file->private_data;
|
||||||
if (xa_load(&gmem->bindings, index) != slot) {
|
if (xa_load(&gmem->bindings, index) != slot) {
|
||||||
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
|
WARN_ON_ONCE(xa_load(&gmem->bindings, index));
|
||||||
return -EIO;
|
return ERR_PTR(-EIO);
|
||||||
}
|
}
|
||||||
|
|
||||||
folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
|
folio = kvm_gmem_get_folio(file_inode(file), index);
|
||||||
if (IS_ERR(folio))
|
if (IS_ERR(folio))
|
||||||
return PTR_ERR(folio);
|
return folio;
|
||||||
|
|
||||||
if (folio_test_hwpoison(folio)) {
|
if (folio_test_hwpoison(folio)) {
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
folio_put(folio);
|
folio_put(folio);
|
||||||
return -EHWPOISON;
|
return ERR_PTR(-EHWPOISON);
|
||||||
}
|
}
|
||||||
|
|
||||||
page = folio_file_page(folio, index);
|
*pfn = folio_file_pfn(folio, index);
|
||||||
|
|
||||||
*pfn = page_to_pfn(page);
|
|
||||||
if (max_order)
|
if (max_order)
|
||||||
*max_order = 0;
|
*max_order = 0;
|
||||||
|
|
||||||
r = 0;
|
*is_prepared = folio_test_uptodate(folio);
|
||||||
|
return folio;
|
||||||
folio_unlock(folio);
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
|
||||||
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
|
gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
|
||||||
{
|
{
|
||||||
struct file *file = kvm_gmem_get_file(slot);
|
struct file *file = kvm_gmem_get_file(slot);
|
||||||
int r;
|
struct folio *folio;
|
||||||
|
bool is_prepared = false;
|
||||||
|
int r = 0;
|
||||||
|
|
||||||
if (!file)
|
if (!file)
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
|
|
||||||
r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
|
folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order);
|
||||||
|
if (IS_ERR(folio)) {
|
||||||
|
r = PTR_ERR(folio);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_prepared)
|
||||||
|
r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio);
|
||||||
|
|
||||||
|
folio_unlock(folio);
|
||||||
|
if (r < 0)
|
||||||
|
folio_put(folio);
|
||||||
|
|
||||||
|
out:
|
||||||
fput(file);
|
fput(file);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
|
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
|
||||||
|
|
||||||
|
#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM
|
||||||
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
|
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
|
||||||
kvm_gmem_populate_cb post_populate, void *opaque)
|
kvm_gmem_populate_cb post_populate, void *opaque)
|
||||||
{
|
{
|
||||||
|
@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||||
|
|
||||||
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
|
npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages);
|
||||||
for (i = 0; i < npages; i += (1 << max_order)) {
|
for (i = 0; i < npages; i += (1 << max_order)) {
|
||||||
|
struct folio *folio;
|
||||||
gfn_t gfn = start_gfn + i;
|
gfn_t gfn = start_gfn + i;
|
||||||
|
bool is_prepared = false;
|
||||||
kvm_pfn_t pfn;
|
kvm_pfn_t pfn;
|
||||||
|
|
||||||
if (signal_pending(current)) {
|
if (signal_pending(current)) {
|
||||||
|
@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false);
|
folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order);
|
||||||
if (ret)
|
if (IS_ERR(folio)) {
|
||||||
|
ret = PTR_ERR(folio);
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!IS_ALIGNED(gfn, (1 << max_order)) ||
|
if (is_prepared) {
|
||||||
(npages - i) < (1 << max_order))
|
folio_unlock(folio);
|
||||||
max_order = 0;
|
folio_put(folio);
|
||||||
|
ret = -EEXIST;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
folio_unlock(folio);
|
||||||
|
WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) ||
|
||||||
|
(npages - i) < (1 << max_order));
|
||||||
|
|
||||||
|
ret = -EINVAL;
|
||||||
|
while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order),
|
||||||
|
KVM_MEMORY_ATTRIBUTE_PRIVATE,
|
||||||
|
KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
|
||||||
|
if (!max_order)
|
||||||
|
goto put_folio_and_exit;
|
||||||
|
max_order--;
|
||||||
|
}
|
||||||
|
|
||||||
p = src ? src + i * PAGE_SIZE : NULL;
|
p = src ? src + i * PAGE_SIZE : NULL;
|
||||||
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
|
ret = post_populate(kvm, gfn, pfn, p, max_order, opaque);
|
||||||
|
if (!ret)
|
||||||
|
kvm_gmem_mark_prepared(folio);
|
||||||
|
|
||||||
put_page(pfn_to_page(pfn));
|
put_folio_and_exit:
|
||||||
|
folio_put(folio);
|
||||||
if (ret)
|
if (ret)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long
|
||||||
return ret && !i ? ret : i;
|
return ret && !i ? ret : i;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
|
EXPORT_SYMBOL_GPL(kvm_gmem_populate);
|
||||||
|
#endif
|
||||||
|
|
|
@ -2398,42 +2398,6 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm,
|
||||||
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
|
#endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
|
||||||
|
|
||||||
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
#ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES
|
||||||
/*
|
|
||||||
* Returns true if _all_ gfns in the range [@start, @end) have attributes
|
|
||||||
* matching @attrs.
|
|
||||||
*/
|
|
||||||
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
|
||||||
unsigned long attrs)
|
|
||||||
{
|
|
||||||
XA_STATE(xas, &kvm->mem_attr_array, start);
|
|
||||||
unsigned long index;
|
|
||||||
bool has_attrs;
|
|
||||||
void *entry;
|
|
||||||
|
|
||||||
rcu_read_lock();
|
|
||||||
|
|
||||||
if (!attrs) {
|
|
||||||
has_attrs = !xas_find(&xas, end - 1);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
has_attrs = true;
|
|
||||||
for (index = start; index < end; index++) {
|
|
||||||
do {
|
|
||||||
entry = xas_next(&xas);
|
|
||||||
} while (xas_retry(&xas, entry));
|
|
||||||
|
|
||||||
if (xas.xa_index != index || xa_to_value(entry) != attrs) {
|
|
||||||
has_attrs = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out:
|
|
||||||
rcu_read_unlock();
|
|
||||||
return has_attrs;
|
|
||||||
}
|
|
||||||
|
|
||||||
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
if (!kvm || kvm_arch_has_private_mem(kvm))
|
if (!kvm || kvm_arch_has_private_mem(kvm))
|
||||||
|
@ -2442,6 +2406,41 @@ static u64 kvm_supported_mem_attributes(struct kvm *kvm)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Returns true if _all_ gfns in the range [@start, @end) have attributes
|
||||||
|
* such that the bits in @mask match @attrs.
|
||||||
|
*/
|
||||||
|
bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||||
|
unsigned long mask, unsigned long attrs)
|
||||||
|
{
|
||||||
|
XA_STATE(xas, &kvm->mem_attr_array, start);
|
||||||
|
unsigned long index;
|
||||||
|
void *entry;
|
||||||
|
|
||||||
|
mask &= kvm_supported_mem_attributes(kvm);
|
||||||
|
if (attrs & ~mask)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (end == start + 1)
|
||||||
|
return (kvm_get_memory_attributes(kvm, start) & mask) == attrs;
|
||||||
|
|
||||||
|
guard(rcu)();
|
||||||
|
if (!attrs)
|
||||||
|
return !xas_find(&xas, end - 1);
|
||||||
|
|
||||||
|
for (index = start; index < end; index++) {
|
||||||
|
do {
|
||||||
|
entry = xas_next(&xas);
|
||||||
|
} while (xas_retry(&xas, entry));
|
||||||
|
|
||||||
|
if (xas.xa_index != index ||
|
||||||
|
(xa_to_value(entry) & mask) != attrs)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
|
static __always_inline void kvm_handle_gfn_range(struct kvm *kvm,
|
||||||
struct kvm_mmu_notifier_range *range)
|
struct kvm_mmu_notifier_range *range)
|
||||||
{
|
{
|
||||||
|
@ -2534,7 +2533,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end,
|
||||||
mutex_lock(&kvm->slots_lock);
|
mutex_lock(&kvm->slots_lock);
|
||||||
|
|
||||||
/* Nothing to do if the entire range as the desired attributes. */
|
/* Nothing to do if the entire range as the desired attributes. */
|
||||||
if (kvm_range_has_memory_attributes(kvm, start, end, attributes))
|
if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes))
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue
Block a user