mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-07-14 13:19:36 +02:00
ANDROID: KVM: arm64: THP support for pKVM guests
When a pKVM stage-2 guest fault happens, if it is within the remits of a transparent huge page at stage-1, that entire page is pinned and we can map the entire range of that page at stage-2. The hypervisor in return installs a block mapping. This reduce TLB pressure and the number of faults. Bug: 278749606 Bug: 278011447 Change-Id: I8ba253134007ba31c39d243130384ac602c0f92e Signed-off-by: Vincent Donnefort <vdonnefort@google.com>
This commit is contained in:
parent
54343dcef5
commit
5d9808b907
|
@ -208,7 +208,7 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
|
||||||
static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
|
static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
|
||||||
enum kvm_pgtable_prot prot)
|
enum kvm_pgtable_prot prot)
|
||||||
{
|
{
|
||||||
return true;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
|
static bool guest_stage2_pte_is_counted(kvm_pte_t pte, u32 level)
|
||||||
|
|
|
@ -1584,7 +1584,10 @@ static int pkvm_host_map_guest(u64 pfn, u64 gfn, u64 nr_pages,
|
||||||
/*
|
/*
|
||||||
* Getting -EPERM at this point implies that the pfn has already been
|
* Getting -EPERM at this point implies that the pfn has already been
|
||||||
* mapped. This should only ever happen when two vCPUs faulted on the
|
* mapped. This should only ever happen when two vCPUs faulted on the
|
||||||
* same page, and the current one lost the race to do the mapping.
|
* same page, and the current one lost the race to do the mapping...
|
||||||
|
*
|
||||||
|
* ...or if we've tried to map a region containing an already mapped
|
||||||
|
* entry.
|
||||||
*/
|
*/
|
||||||
return (ret == -EPERM) ? -EAGAIN : ret;
|
return (ret == -EPERM) ? -EAGAIN : ret;
|
||||||
}
|
}
|
||||||
|
@ -1661,12 +1664,14 @@ static int pkvm_relax_perms(struct kvm_vcpu *vcpu, u64 pfn, u64 gfn, u8 order,
|
||||||
(void *)prot, false);
|
(void *)prot, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t *fault_ipa,
|
||||||
unsigned long hva)
|
struct kvm_memory_slot *memslot, unsigned long hva,
|
||||||
|
size_t *size)
|
||||||
{
|
{
|
||||||
struct mm_struct *mm = current->mm;
|
|
||||||
unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
|
unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE;
|
||||||
struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc;
|
struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc;
|
||||||
|
unsigned long index, pmd_offset, page_size;
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
struct kvm_pinned_page *ppage;
|
struct kvm_pinned_page *ppage;
|
||||||
struct kvm *kvm = vcpu->kvm;
|
struct kvm *kvm = vcpu->kvm;
|
||||||
int ret, nr_pages;
|
int ret, nr_pages;
|
||||||
|
@ -1686,10 +1691,6 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||||
if (!ppage)
|
if (!ppage)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
ret = account_locked_vm(mm, 1, true);
|
|
||||||
if (ret)
|
|
||||||
goto free_ppage;
|
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
ret = pin_user_pages(hva, 1, flags, &page);
|
ret = pin_user_pages(hva, 1, flags, &page);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
@ -1697,10 +1698,10 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||||
if (ret == -EHWPOISON) {
|
if (ret == -EHWPOISON) {
|
||||||
kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
|
kvm_send_hwpoison_signal(hva, PAGE_SHIFT);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto dec_account;
|
goto free_ppage;
|
||||||
} else if (ret != 1) {
|
} else if (ret != 1) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto dec_account;
|
goto free_ppage;
|
||||||
} else if (kvm->arch.pkvm.enabled && !PageSwapBacked(page)) {
|
} else if (kvm->arch.pkvm.enabled && !PageSwapBacked(page)) {
|
||||||
/*
|
/*
|
||||||
* We really can't deal with page-cache pages returned by GUP
|
* We really can't deal with page-cache pages returned by GUP
|
||||||
|
@ -1720,30 +1721,63 @@ static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||||
goto unpin;
|
goto unpin;
|
||||||
}
|
}
|
||||||
|
|
||||||
write_lock(&kvm->mmu_lock);
|
|
||||||
pfn = page_to_pfn(page);
|
pfn = page_to_pfn(page);
|
||||||
ret = pkvm_host_map_guest(pfn, fault_ipa >> PAGE_SHIFT, 1, KVM_PGTABLE_PROT_R);
|
pmd_offset = *fault_ipa & (PMD_SIZE - 1);
|
||||||
|
page_size = transparent_hugepage_adjust(kvm, memslot,
|
||||||
|
hva, &pfn,
|
||||||
|
fault_ipa);
|
||||||
|
page = pfn_to_page(pfn);
|
||||||
|
|
||||||
|
if (size)
|
||||||
|
*size = page_size;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
ret = account_locked_vm(mm, page_size >> PAGE_SHIFT, true);
|
||||||
|
if (ret)
|
||||||
|
goto unpin;
|
||||||
|
|
||||||
|
write_lock(&kvm->mmu_lock);
|
||||||
|
/*
|
||||||
|
* If we already have a mapping in the middle of the THP, we have no
|
||||||
|
* other choice than enforcing PAGE_SIZE for pkvm_host_map_guest() to
|
||||||
|
* succeed.
|
||||||
|
*/
|
||||||
|
index = *fault_ipa;
|
||||||
|
if (page_size > PAGE_SIZE &&
|
||||||
|
mt_find(&kvm->arch.pkvm.pinned_pages, &index, index + page_size - 1)) {
|
||||||
|
write_unlock(&kvm->mmu_lock);
|
||||||
|
*fault_ipa += pmd_offset;
|
||||||
|
pfn += pmd_offset >> PAGE_SHIFT;
|
||||||
|
page = pfn_to_page(pfn);
|
||||||
|
page_size = PAGE_SIZE;
|
||||||
|
account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = pkvm_host_map_guest(pfn, *fault_ipa >> PAGE_SHIFT,
|
||||||
|
page_size >> PAGE_SHIFT, KVM_PGTABLE_PROT_R);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (ret == -EAGAIN)
|
if (ret == -EAGAIN)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto unlock;
|
|
||||||
|
goto dec_account;
|
||||||
}
|
}
|
||||||
|
|
||||||
ppage->page = page;
|
ppage->page = page;
|
||||||
ppage->ipa = fault_ipa;
|
ppage->ipa = *fault_ipa;
|
||||||
ppage->order = 0;
|
ppage->order = get_order(page_size);
|
||||||
ppage->pins = 1 << ppage->order;
|
ppage->pins = 1 << ppage->order;
|
||||||
WARN_ON(insert_ppage(kvm, ppage));
|
WARN_ON(insert_ppage(kvm, ppage));
|
||||||
|
|
||||||
write_unlock(&kvm->mmu_lock);
|
write_unlock(&kvm->mmu_lock);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
unlock:
|
dec_account:
|
||||||
write_unlock(&kvm->mmu_lock);
|
write_unlock(&kvm->mmu_lock);
|
||||||
|
account_locked_vm(mm, page_size >> PAGE_SHIFT, false);
|
||||||
unpin:
|
unpin:
|
||||||
unpin_user_pages(&page, 1);
|
unpin_user_pages(&page, 1);
|
||||||
dec_account:
|
|
||||||
account_locked_vm(mm, 1, false);
|
|
||||||
free_ppage:
|
free_ppage:
|
||||||
kfree(ppage);
|
kfree(ppage);
|
||||||
|
|
||||||
|
@ -1754,6 +1788,7 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
|
||||||
{
|
{
|
||||||
phys_addr_t ipa_end = fault_ipa + size - 1;
|
phys_addr_t ipa_end = fault_ipa + size - 1;
|
||||||
struct kvm_pinned_page *ppage;
|
struct kvm_pinned_page *ppage;
|
||||||
|
unsigned long page_size;
|
||||||
int err = 0, idx;
|
int err = 0, idx;
|
||||||
|
|
||||||
if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(fault_ipa))
|
if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(fault_ipa))
|
||||||
|
@ -1771,6 +1806,7 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
|
||||||
|
|
||||||
while (size) {
|
while (size) {
|
||||||
if (ppage && ppage->ipa == fault_ipa) {
|
if (ppage && ppage->ipa == fault_ipa) {
|
||||||
|
page_size = PAGE_SIZE << ppage->order;
|
||||||
ppage = mt_next(&vcpu->kvm->arch.pkvm.pinned_pages,
|
ppage = mt_next(&vcpu->kvm->arch.pkvm.pinned_pages,
|
||||||
ppage->ipa, ULONG_MAX);
|
ppage->ipa, ULONG_MAX);
|
||||||
} else {
|
} else {
|
||||||
|
@ -1787,7 +1823,7 @@ int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t si
|
||||||
}
|
}
|
||||||
|
|
||||||
read_unlock(&vcpu->kvm->mmu_lock);
|
read_unlock(&vcpu->kvm->mmu_lock);
|
||||||
err = pkvm_mem_abort(vcpu, fault_ipa, hva);
|
err = pkvm_mem_abort(vcpu, &fault_ipa, memslot, hva, &page_size);
|
||||||
read_lock(&vcpu->kvm->mmu_lock);
|
read_lock(&vcpu->kvm->mmu_lock);
|
||||||
if (err)
|
if (err)
|
||||||
goto end;
|
goto end;
|
||||||
|
@ -1870,7 +1906,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||||
* logging_active is guaranteed to never be true for VM_PFNMAP
|
* logging_active is guaranteed to never be true for VM_PFNMAP
|
||||||
* memslots.
|
* memslots.
|
||||||
*/
|
*/
|
||||||
if (logging_active || is_protected_kvm_enabled()) {
|
if (logging_active) {
|
||||||
force_pte = true;
|
force_pte = true;
|
||||||
vma_shift = PAGE_SHIFT;
|
vma_shift = PAGE_SHIFT;
|
||||||
} else {
|
} else {
|
||||||
|
@ -2184,7 +2220,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
if (is_protected_kvm_enabled() && fault_status != ESR_ELx_FSC_PERM)
|
if (is_protected_kvm_enabled() && fault_status != ESR_ELx_FSC_PERM)
|
||||||
ret = pkvm_mem_abort(vcpu, fault_ipa, hva);
|
ret = pkvm_mem_abort(vcpu, &fault_ipa, memslot, hva, NULL);
|
||||||
else
|
else
|
||||||
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
|
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user