mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
* Fix interaction between some filesystems and Secure Execution
* Some cleanups and refactorings, preparing for an upcoming big series -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEoWuZBM6M3lCBSfTnuARItAMU6BMFAmg3MQAACgkQuARItAMU 6BPuAw//T/jJ9faqhGsQcHJ5ovDPCLOC0P69rDN9XsKgKxstSka4EM6a1uLmRScm V4eLkov+0iD1xGv8xF8GPsyOEpXPthKjgg5OCjVUNhklK9+5RA/1MUxXe09fHoZ4 zac49upOGjRX4w8OeSPCeEL2hBFwJRcbqiTPrtWM3wuhAfFCb3XhM8rsuKDXgDGS TvsjHkDLYzmfGUfJj9v4h8rVUeA++9hK/7cajGpZYKrDszdGZQwrZrTMQMsojiYG BYxaJMNBWcLIcHejUk1mXKI22SWfBIMtwdNem8gFsf1jkgISC9fOUKEQOVC1rnVK JZeHWrndT8HcDCn0hB+cGsSv366S5jlcAyjAQVbspUg7K0YerXXg5xn7+yb6N0S6 bDmgvJTi9n1vgUicvqof62M52NehrslyNB/y7LqrR/jJi7XcnHNR3T6S+Bs5xTAg /O8+GGC1ml4FNNhXt3ZjrTJCwbQJnHtacKWEPtfkQ6REvpThvkJOBKBtqBV4qI5E QHB7hMV49vowmhaqOnQPD/l5XBCg2zKLKMBUbWzSboQpHhV8jGTTCsxcYR3rj/HY 4nUHk6u8DI8NGJ1sOhmbBePBzeW1ddlHVD2E9KeHXWrBJrXLaoRjrkWO6jQsUS+1 5GCYJHk+z9W/tPpZopa88ABj8Aqhr6rMd2CkIGFo1Wpci/j1gmU= =KT// -----END PGP SIGNATURE----- Merge tag 'kvm-s390-next-6.16-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD * Fix interaction between some filesystems and Secure Execution * Some cleanups and refactorings, preparing for an upcoming big series
This commit is contained in:
commit
e9ba21fb5d
|
@ -13102,12 +13102,14 @@ S: Supported
|
||||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
|
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
|
||||||
F: Documentation/virt/kvm/s390*
|
F: Documentation/virt/kvm/s390*
|
||||||
F: arch/s390/include/asm/gmap.h
|
F: arch/s390/include/asm/gmap.h
|
||||||
|
F: arch/s390/include/asm/gmap_helpers.h
|
||||||
F: arch/s390/include/asm/kvm*
|
F: arch/s390/include/asm/kvm*
|
||||||
F: arch/s390/include/uapi/asm/kvm*
|
F: arch/s390/include/uapi/asm/kvm*
|
||||||
F: arch/s390/include/uapi/asm/uvdevice.h
|
F: arch/s390/include/uapi/asm/uvdevice.h
|
||||||
F: arch/s390/kernel/uv.c
|
F: arch/s390/kernel/uv.c
|
||||||
F: arch/s390/kvm/
|
F: arch/s390/kvm/
|
||||||
F: arch/s390/mm/gmap.c
|
F: arch/s390/mm/gmap.c
|
||||||
|
F: arch/s390/mm/gmap_helpers.c
|
||||||
F: drivers/s390/char/uvdevice.c
|
F: drivers/s390/char/uvdevice.c
|
||||||
F: tools/testing/selftests/drivers/s390x/uvdevice/
|
F: tools/testing/selftests/drivers/s390x/uvdevice/
|
||||||
F: tools/testing/selftests/kvm/*/s390/
|
F: tools/testing/selftests/kvm/*/s390/
|
||||||
|
|
|
@ -110,7 +110,6 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
|
||||||
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
|
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
|
||||||
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
|
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
|
||||||
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
|
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
|
||||||
void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
|
|
||||||
void __gmap_zap(struct gmap *, unsigned long gaddr);
|
void __gmap_zap(struct gmap *, unsigned long gaddr);
|
||||||
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
|
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
|
||||||
|
|
||||||
|
@ -134,7 +133,6 @@ int gmap_protect_one(struct gmap *gmap, unsigned long gaddr, int prot, unsigned
|
||||||
|
|
||||||
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
|
||||||
unsigned long gaddr, unsigned long vmaddr);
|
unsigned long gaddr, unsigned long vmaddr);
|
||||||
int s390_disable_cow_sharing(void);
|
|
||||||
int s390_replace_asce(struct gmap *gmap);
|
int s390_replace_asce(struct gmap *gmap);
|
||||||
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
|
||||||
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
|
||||||
|
|
15
arch/s390/include/asm/gmap_helpers.h
Normal file
15
arch/s390/include/asm/gmap_helpers.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/*
|
||||||
|
* Helper functions for KVM guest address space mapping code
|
||||||
|
*
|
||||||
|
* Copyright IBM Corp. 2025
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _ASM_S390_GMAP_HELPERS_H
|
||||||
|
#define _ASM_S390_GMAP_HELPERS_H
|
||||||
|
|
||||||
|
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
|
||||||
|
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
|
||||||
|
int gmap_helper_disable_cow_sharing(void);
|
||||||
|
|
||||||
|
#endif /* _ASM_S390_GMAP_HELPERS_H */
|
|
@ -36,6 +36,7 @@ static inline bool __tlb_remove_folio_pages(struct mmu_gather *tlb,
|
||||||
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm-generic/tlb.h>
|
#include <asm-generic/tlb.h>
|
||||||
|
#include <asm/gmap.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Release the page cache reference for a pte removed by
|
* Release the page cache reference for a pte removed by
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/gmap.h>
|
|
||||||
#include <asm/asm.h>
|
#include <asm/asm.h>
|
||||||
|
|
||||||
#define UVC_CC_OK 0
|
#define UVC_CC_OK 0
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <linux/pagemap.h>
|
#include <linux/pagemap.h>
|
||||||
#include <linux/swap.h>
|
#include <linux/swap.h>
|
||||||
#include <linux/pagewalk.h>
|
#include <linux/pagewalk.h>
|
||||||
|
#include <linux/backing-dev.h>
|
||||||
#include <asm/facility.h>
|
#include <asm/facility.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/uv.h>
|
#include <asm/uv.h>
|
||||||
|
@ -135,7 +136,7 @@ int uv_destroy_folio(struct folio *folio)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* See gmap_make_secure(): large folios cannot be secure */
|
/* Large folios cannot be secure */
|
||||||
if (unlikely(folio_test_large(folio)))
|
if (unlikely(folio_test_large(folio)))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -184,7 +185,7 @@ int uv_convert_from_secure_folio(struct folio *folio)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
/* See gmap_make_secure(): large folios cannot be secure */
|
/* Large folios cannot be secure */
|
||||||
if (unlikely(folio_test_large(folio)))
|
if (unlikely(folio_test_large(folio)))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -324,32 +325,87 @@ static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct u
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* s390_wiggle_split_folio() - try to drain extra references to a folio and optionally split.
|
* s390_wiggle_split_folio() - try to drain extra references to a folio and
|
||||||
|
* split the folio if it is large.
|
||||||
* @mm: the mm containing the folio to work on
|
* @mm: the mm containing the folio to work on
|
||||||
* @folio: the folio
|
* @folio: the folio
|
||||||
* @split: whether to split a large folio
|
|
||||||
*
|
*
|
||||||
* Context: Must be called while holding an extra reference to the folio;
|
* Context: Must be called while holding an extra reference to the folio;
|
||||||
* the mm lock should not be held.
|
* the mm lock should not be held.
|
||||||
* Return: 0 if the folio was split successfully;
|
* Return: 0 if the operation was successful;
|
||||||
* -EAGAIN if the folio was not split successfully but another attempt
|
* -EAGAIN if splitting the large folio was not successful,
|
||||||
* can be made, or if @split was set to false;
|
* but another attempt can be made;
|
||||||
* -EINVAL in case of other errors. See split_folio().
|
* -EINVAL in case of other folio splitting errors. See split_folio().
|
||||||
*/
|
*/
|
||||||
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio, bool split)
|
static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc, tried_splits;
|
||||||
|
|
||||||
lockdep_assert_not_held(&mm->mmap_lock);
|
lockdep_assert_not_held(&mm->mmap_lock);
|
||||||
folio_wait_writeback(folio);
|
folio_wait_writeback(folio);
|
||||||
lru_add_drain_all();
|
lru_add_drain_all();
|
||||||
if (split) {
|
|
||||||
|
if (!folio_test_large(folio))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (tried_splits = 0; tried_splits < 2; tried_splits++) {
|
||||||
|
struct address_space *mapping;
|
||||||
|
loff_t lstart, lend;
|
||||||
|
struct inode *inode;
|
||||||
|
|
||||||
folio_lock(folio);
|
folio_lock(folio);
|
||||||
rc = split_folio(folio);
|
rc = split_folio(folio);
|
||||||
|
if (rc != -EBUSY) {
|
||||||
|
folio_unlock(folio);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Splitting with -EBUSY can fail for various reasons, but we
|
||||||
|
* have to handle one case explicitly for now: some mappings
|
||||||
|
* don't allow for splitting dirty folios; writeback will
|
||||||
|
* mark them clean again, including marking all page table
|
||||||
|
* entries mapping the folio read-only, to catch future write
|
||||||
|
* attempts.
|
||||||
|
*
|
||||||
|
* While the system should be writing back dirty folios in the
|
||||||
|
* background, we obtained this folio by looking up a writable
|
||||||
|
* page table entry. On these problematic mappings, writable
|
||||||
|
* page table entries imply dirty folios, preventing the
|
||||||
|
* split in the first place.
|
||||||
|
*
|
||||||
|
* To prevent a livelock when trigger writeback manually and
|
||||||
|
* letting the caller look up the folio again in the page
|
||||||
|
* table (turning it dirty), immediately try to split again.
|
||||||
|
*
|
||||||
|
* This is only a problem for some mappings (e.g., XFS);
|
||||||
|
* mappings that do not support writeback (e.g., shmem) do not
|
||||||
|
* apply.
|
||||||
|
*/
|
||||||
|
if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
|
||||||
|
!folio->mapping || !mapping_can_writeback(folio->mapping)) {
|
||||||
|
folio_unlock(folio);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ideally, we'd only trigger writeback on this exact folio. But
|
||||||
|
* there is no easy way to do that, so we'll stabilize the
|
||||||
|
* mapping while we still hold the folio lock, so we can drop
|
||||||
|
* the folio lock to trigger writeback on the range currently
|
||||||
|
* covered by the folio instead.
|
||||||
|
*/
|
||||||
|
mapping = folio->mapping;
|
||||||
|
lstart = folio_pos(folio);
|
||||||
|
lend = lstart + folio_size(folio) - 1;
|
||||||
|
inode = igrab(mapping->host);
|
||||||
folio_unlock(folio);
|
folio_unlock(folio);
|
||||||
|
|
||||||
if (rc != -EBUSY)
|
if (unlikely(!inode))
|
||||||
return rc;
|
break;
|
||||||
|
|
||||||
|
filemap_write_and_wait_range(mapping, lstart, lend);
|
||||||
|
iput(mapping->host);
|
||||||
}
|
}
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
|
@ -393,8 +449,11 @@ int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header
|
||||||
folio_walk_end(&fw, vma);
|
folio_walk_end(&fw, vma);
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
if (rc == -E2BIG || rc == -EBUSY)
|
if (rc == -E2BIG || rc == -EBUSY) {
|
||||||
rc = s390_wiggle_split_folio(mm, folio, rc == -E2BIG);
|
rc = s390_wiggle_split_folio(mm, folio);
|
||||||
|
if (!rc)
|
||||||
|
rc = -EAGAIN;
|
||||||
|
}
|
||||||
folio_put(folio);
|
folio_put(folio);
|
||||||
|
|
||||||
return rc;
|
return rc;
|
||||||
|
@ -403,15 +462,15 @@ EXPORT_SYMBOL_GPL(make_hva_secure);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To be called with the folio locked or with an extra reference! This will
|
* To be called with the folio locked or with an extra reference! This will
|
||||||
* prevent gmap_make_secure from touching the folio concurrently. Having 2
|
* prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
|
||||||
* parallel arch_make_folio_accessible is fine, as the UV calls will become a
|
* Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
|
||||||
* no-op if the folio is already exported.
|
* become a no-op if the folio is already exported.
|
||||||
*/
|
*/
|
||||||
int arch_make_folio_accessible(struct folio *folio)
|
int arch_make_folio_accessible(struct folio *folio)
|
||||||
{
|
{
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/* See gmap_make_secure(): large folios cannot be secure */
|
/* Large folios cannot be secure */
|
||||||
if (unlikely(folio_test_large(folio)))
|
if (unlikely(folio_test_large(folio)))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
|
|
@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
|
||||||
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
|
||||||
|
|
||||||
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
|
||||||
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o gmap-vsie.o
|
kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap-vsie.o
|
||||||
|
|
||||||
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
|
||||||
obj-$(CONFIG_KVM) += kvm.o
|
obj-$(CONFIG_KVM) += kvm.o
|
||||||
|
|
|
@ -11,12 +11,30 @@
|
||||||
#include <linux/kvm.h>
|
#include <linux/kvm.h>
|
||||||
#include <linux/kvm_host.h>
|
#include <linux/kvm_host.h>
|
||||||
#include <asm/gmap.h>
|
#include <asm/gmap.h>
|
||||||
|
#include <asm/gmap_helpers.h>
|
||||||
#include <asm/virtio-ccw.h>
|
#include <asm/virtio-ccw.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "trace-s390.h"
|
#include "trace-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
|
|
||||||
|
static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
|
||||||
|
{
|
||||||
|
struct kvm_memslot_iter iter;
|
||||||
|
struct kvm_memory_slot *slot;
|
||||||
|
struct kvm_memslots *slots;
|
||||||
|
unsigned long start, end;
|
||||||
|
|
||||||
|
slots = kvm_vcpu_memslots(vcpu);
|
||||||
|
|
||||||
|
kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
|
||||||
|
slot = iter.slot;
|
||||||
|
start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
|
||||||
|
end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
|
||||||
|
gmap_helper_discard(vcpu->kvm->mm, start, end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int diag_release_pages(struct kvm_vcpu *vcpu)
|
static int diag_release_pages(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
unsigned long start, end;
|
unsigned long start, end;
|
||||||
|
@ -32,12 +50,13 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
|
||||||
|
|
||||||
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
|
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
|
||||||
|
|
||||||
|
mmap_read_lock(vcpu->kvm->mm);
|
||||||
/*
|
/*
|
||||||
* We checked for start >= end above, so lets check for the
|
* We checked for start >= end above, so lets check for the
|
||||||
* fast path (no prefix swap page involved)
|
* fast path (no prefix swap page involved)
|
||||||
*/
|
*/
|
||||||
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
|
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
|
||||||
gmap_discard(vcpu->arch.gmap, start, end);
|
do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* This is slow path. gmap_discard will check for start
|
* This is slow path. gmap_discard will check for start
|
||||||
|
@ -45,13 +64,14 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
|
||||||
* prefix and let gmap_discard make some of these calls
|
* prefix and let gmap_discard make some of these calls
|
||||||
* NOPs.
|
* NOPs.
|
||||||
*/
|
*/
|
||||||
gmap_discard(vcpu->arch.gmap, start, prefix);
|
do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
|
||||||
if (start <= prefix)
|
if (start <= prefix)
|
||||||
gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
|
do_discard_gfn_range(vcpu, 0, 1);
|
||||||
if (end > prefix + PAGE_SIZE)
|
if (end > prefix + PAGE_SIZE)
|
||||||
gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
|
do_discard_gfn_range(vcpu, 1, 2);
|
||||||
gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
|
do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
|
||||||
}
|
}
|
||||||
|
mmap_read_unlock(vcpu->kvm->mm);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,9 +16,10 @@
|
||||||
#include <asm/gmap.h>
|
#include <asm/gmap.h>
|
||||||
#include <asm/dat-bits.h>
|
#include <asm/dat-bits.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gmap.h"
|
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
|
|
||||||
|
#define GMAP_SHADOW_FAKE_TABLE 1ULL
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* vaddress union in order to easily decode a virtual address into its
|
* vaddress union in order to easily decode a virtual address into its
|
||||||
* region first index, region second index etc. parts.
|
* region first index, region second index etc. parts.
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include <asm/uv.h>
|
#include <asm/uv.h>
|
||||||
|
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gmap_find_shadow - find a specific asce in the list of shadow tables
|
* gmap_find_shadow - find a specific asce in the list of shadow tables
|
||||||
|
|
|
@ -1,121 +0,0 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
|
||||||
/*
|
|
||||||
* Guest memory management for KVM/s390
|
|
||||||
*
|
|
||||||
* Copyright IBM Corp. 2008, 2020, 2024
|
|
||||||
*
|
|
||||||
* Author(s): Claudio Imbrenda <imbrenda@linux.ibm.com>
|
|
||||||
* Martin Schwidefsky <schwidefsky@de.ibm.com>
|
|
||||||
* David Hildenbrand <david@redhat.com>
|
|
||||||
* Janosch Frank <frankja@linux.vnet.ibm.com>
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <linux/compiler.h>
|
|
||||||
#include <linux/kvm.h>
|
|
||||||
#include <linux/kvm_host.h>
|
|
||||||
#include <linux/pgtable.h>
|
|
||||||
#include <linux/pagemap.h>
|
|
||||||
|
|
||||||
#include <asm/lowcore.h>
|
|
||||||
#include <asm/gmap.h>
|
|
||||||
#include <asm/uv.h>
|
|
||||||
|
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gmap_make_secure() - make one guest page secure
|
|
||||||
* @gmap: the guest gmap
|
|
||||||
* @gaddr: the guest address that needs to be made secure
|
|
||||||
* @uvcb: the UVCB specifying which operation needs to be performed
|
|
||||||
*
|
|
||||||
* Context: needs to be called with kvm->srcu held.
|
|
||||||
* Return: 0 on success, < 0 in case of error.
|
|
||||||
*/
|
|
||||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
|
|
||||||
{
|
|
||||||
struct kvm *kvm = gmap->private;
|
|
||||||
unsigned long vmaddr;
|
|
||||||
|
|
||||||
lockdep_assert_held(&kvm->srcu);
|
|
||||||
|
|
||||||
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
|
|
||||||
if (kvm_is_error_hva(vmaddr))
|
|
||||||
return -EFAULT;
|
|
||||||
return make_hva_secure(gmap->mm, vmaddr, uvcb);
|
|
||||||
}
|
|
||||||
|
|
||||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
|
|
||||||
{
|
|
||||||
struct uv_cb_cts uvcb = {
|
|
||||||
.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
|
|
||||||
.header.len = sizeof(uvcb),
|
|
||||||
.guest_handle = gmap->guest_handle,
|
|
||||||
.gaddr = gaddr,
|
|
||||||
};
|
|
||||||
|
|
||||||
return gmap_make_secure(gmap, gaddr, &uvcb);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* __gmap_destroy_page() - Destroy a guest page.
|
|
||||||
* @gmap: the gmap of the guest
|
|
||||||
* @page: the page to destroy
|
|
||||||
*
|
|
||||||
* An attempt will be made to destroy the given guest page. If the attempt
|
|
||||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
|
||||||
* appropriate error is returned.
|
|
||||||
*
|
|
||||||
* Context: must be called holding the mm lock for gmap->mm
|
|
||||||
*/
|
|
||||||
static int __gmap_destroy_page(struct gmap *gmap, struct page *page)
|
|
||||||
{
|
|
||||||
struct folio *folio = page_folio(page);
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* See gmap_make_secure(): large folios cannot be secure. Small
|
|
||||||
* folio implies FW_LEVEL_PTE.
|
|
||||||
*/
|
|
||||||
if (folio_test_large(folio))
|
|
||||||
return -EFAULT;
|
|
||||||
|
|
||||||
rc = uv_destroy_folio(folio);
|
|
||||||
/*
|
|
||||||
* Fault handlers can race; it is possible that two CPUs will fault
|
|
||||||
* on the same secure page. One CPU can destroy the page, reboot,
|
|
||||||
* re-enter secure mode and import it, while the second CPU was
|
|
||||||
* stuck at the beginning of the handler. At some point the second
|
|
||||||
* CPU will be able to progress, and it will not be able to destroy
|
|
||||||
* the page. In that case we do not want to terminate the process,
|
|
||||||
* we instead try to export the page.
|
|
||||||
*/
|
|
||||||
if (rc)
|
|
||||||
rc = uv_convert_from_secure_folio(folio);
|
|
||||||
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gmap_destroy_page() - Destroy a guest page.
|
|
||||||
* @gmap: the gmap of the guest
|
|
||||||
* @gaddr: the guest address to destroy
|
|
||||||
*
|
|
||||||
* An attempt will be made to destroy the given guest page. If the attempt
|
|
||||||
* fails, an attempt is made to export the page. If both attempts fail, an
|
|
||||||
* appropriate error is returned.
|
|
||||||
*
|
|
||||||
* Context: may sleep.
|
|
||||||
*/
|
|
||||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
|
|
||||||
{
|
|
||||||
struct page *page;
|
|
||||||
int rc = 0;
|
|
||||||
|
|
||||||
mmap_read_lock(gmap->mm);
|
|
||||||
page = gfn_to_page(gmap->private, gpa_to_gfn(gaddr));
|
|
||||||
if (page)
|
|
||||||
rc = __gmap_destroy_page(gmap, page);
|
|
||||||
kvm_release_page_clean(page);
|
|
||||||
mmap_read_unlock(gmap->mm);
|
|
||||||
return rc;
|
|
||||||
}
|
|
|
@ -1,39 +0,0 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
|
||||||
/*
|
|
||||||
* KVM guest address space mapping code
|
|
||||||
*
|
|
||||||
* Copyright IBM Corp. 2007, 2016, 2025
|
|
||||||
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
|
|
||||||
* Claudio Imbrenda <imbrenda@linux.ibm.com>
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef ARCH_KVM_S390_GMAP_H
|
|
||||||
#define ARCH_KVM_S390_GMAP_H
|
|
||||||
|
|
||||||
#define GMAP_SHADOW_FAKE_TABLE 1ULL
|
|
||||||
|
|
||||||
int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
|
|
||||||
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
|
|
||||||
int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
|
|
||||||
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gmap_shadow_valid - check if a shadow guest address space matches the
|
|
||||||
* given properties and is still valid
|
|
||||||
* @sg: pointer to the shadow guest address space structure
|
|
||||||
* @asce: ASCE for which the shadow table is requested
|
|
||||||
* @edat_level: edat level to be used for the shadow translation
|
|
||||||
*
|
|
||||||
* Returns 1 if the gmap shadow is still valid and matches the given
|
|
||||||
* properties, the caller can continue using it. Returns 0 otherwise, the
|
|
||||||
* caller has to request a new shadow gmap in this case.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
static inline int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
|
|
||||||
{
|
|
||||||
if (sg->removed)
|
|
||||||
return 0;
|
|
||||||
return sg->orig_asce == asce && sg->edat_level == edat_level;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -21,7 +21,6 @@
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
#include "trace-s390.h"
|
#include "trace-s390.h"
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
|
u8 kvm_s390_get_ilen(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
@ -545,7 +544,7 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
|
||||||
guest_uvcb->header.cmd);
|
guest_uvcb->header.cmd);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
rc = gmap_make_secure(vcpu->arch.gmap, uvcb.gaddr, &uvcb);
|
rc = kvm_s390_pv_make_secure(vcpu->kvm, uvcb.gaddr, &uvcb);
|
||||||
/*
|
/*
|
||||||
* If the unpin did not succeed, the guest will exit again for the UVC
|
* If the unpin did not succeed, the guest will exit again for the UVC
|
||||||
* and we will retry the unpin.
|
* and we will retry the unpin.
|
||||||
|
@ -653,10 +652,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
|
||||||
break;
|
break;
|
||||||
case ICPT_PV_PREF:
|
case ICPT_PV_PREF:
|
||||||
rc = 0;
|
rc = 0;
|
||||||
gmap_convert_to_secure(vcpu->arch.gmap,
|
kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu));
|
||||||
kvm_s390_get_prefix(vcpu));
|
kvm_s390_pv_convert_to_secure(vcpu->kvm, kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
|
||||||
gmap_convert_to_secure(vcpu->arch.gmap,
|
|
||||||
kvm_s390_get_prefix(vcpu) + PAGE_SIZE);
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
return -EOPNOTSUPP;
|
return -EOPNOTSUPP;
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include <asm/machine.h>
|
#include <asm/machine.h>
|
||||||
#include <asm/stp.h>
|
#include <asm/stp.h>
|
||||||
#include <asm/gmap.h>
|
#include <asm/gmap.h>
|
||||||
|
#include <asm/gmap_helpers.h>
|
||||||
#include <asm/nmi.h>
|
#include <asm/nmi.h>
|
||||||
#include <asm/isc.h>
|
#include <asm/isc.h>
|
||||||
#include <asm/sclp.h>
|
#include <asm/sclp.h>
|
||||||
|
@ -52,7 +53,6 @@
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
#include "pci.h"
|
#include "pci.h"
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include "trace.h"
|
#include "trace.h"
|
||||||
|
@ -2674,7 +2674,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
|
||||||
if (r)
|
if (r)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
r = s390_disable_cow_sharing();
|
mmap_write_lock(kvm->mm);
|
||||||
|
r = gmap_helper_disable_cow_sharing();
|
||||||
|
mmap_write_unlock(kvm->mm);
|
||||||
if (r)
|
if (r)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -4973,7 +4975,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||||
* previous protected guest. The old pages need to be destroyed
|
* previous protected guest. The old pages need to be destroyed
|
||||||
* so the new guest can use them.
|
* so the new guest can use them.
|
||||||
*/
|
*/
|
||||||
if (gmap_destroy_page(vcpu->arch.gmap, gaddr)) {
|
if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) {
|
||||||
/*
|
/*
|
||||||
* Either KVM messed up the secure guest mapping or the
|
* Either KVM messed up the secure guest mapping or the
|
||||||
* same page is mapped into multiple secure guests.
|
* same page is mapped into multiple secure guests.
|
||||||
|
@ -4995,7 +4997,7 @@ static int vcpu_post_run_handle_fault(struct kvm_vcpu *vcpu)
|
||||||
* guest has not been imported yet. Try to import the page into
|
* guest has not been imported yet. Try to import the page into
|
||||||
* the protected guest.
|
* the protected guest.
|
||||||
*/
|
*/
|
||||||
rc = gmap_convert_to_secure(vcpu->arch.gmap, gaddr);
|
rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr);
|
||||||
if (rc == -EINVAL)
|
if (rc == -EINVAL)
|
||||||
send_sig(SIGSEGV, current, 0);
|
send_sig(SIGSEGV, current, 0);
|
||||||
if (rc != -ENXIO)
|
if (rc != -ENXIO)
|
||||||
|
|
|
@ -308,6 +308,9 @@ int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
|
||||||
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
|
u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
|
||||||
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
|
||||||
u16 *rc, u16 *rrc);
|
u16 *rc, u16 *rrc);
|
||||||
|
int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr);
|
||||||
|
int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr);
|
||||||
|
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb);
|
||||||
|
|
||||||
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
|
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
@ -319,6 +322,41 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
|
||||||
return vcpu->arch.pv.handle;
|
return vcpu->arch.pv.handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* __kvm_s390_pv_destroy_page() - Destroy a guest page.
|
||||||
|
* @page: the page to destroy
|
||||||
|
*
|
||||||
|
* An attempt will be made to destroy the given guest page. If the attempt
|
||||||
|
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||||
|
* appropriate error is returned.
|
||||||
|
*
|
||||||
|
* Context: must be called holding the mm lock for gmap->mm
|
||||||
|
*/
|
||||||
|
static inline int __kvm_s390_pv_destroy_page(struct page *page)
|
||||||
|
{
|
||||||
|
struct folio *folio = page_folio(page);
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
/* Large folios cannot be secure. Small folio implies FW_LEVEL_PTE. */
|
||||||
|
if (folio_test_large(folio))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
rc = uv_destroy_folio(folio);
|
||||||
|
/*
|
||||||
|
* Fault handlers can race; it is possible that two CPUs will fault
|
||||||
|
* on the same secure page. One CPU can destroy the page, reboot,
|
||||||
|
* re-enter secure mode and import it, while the second CPU was
|
||||||
|
* stuck at the beginning of the handler. At some point the second
|
||||||
|
* CPU will be able to progress, and it will not be able to destroy
|
||||||
|
* the page. In that case we do not want to terminate the process,
|
||||||
|
* we instead try to export the page.
|
||||||
|
*/
|
||||||
|
if (rc)
|
||||||
|
rc = uv_convert_from_secure_folio(folio);
|
||||||
|
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/* implemented in interrupt.c */
|
/* implemented in interrupt.c */
|
||||||
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
|
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
|
||||||
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
|
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
|
||||||
|
@ -398,6 +436,10 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start,
|
||||||
unsigned long end);
|
unsigned long end);
|
||||||
void kvm_s390_vsie_init(struct kvm *kvm);
|
void kvm_s390_vsie_init(struct kvm *kvm);
|
||||||
void kvm_s390_vsie_destroy(struct kvm *kvm);
|
void kvm_s390_vsie_destroy(struct kvm *kvm);
|
||||||
|
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level);
|
||||||
|
|
||||||
|
/* implemented in gmap-vsie.c */
|
||||||
|
struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, int edat_level);
|
||||||
|
|
||||||
/* implemented in sigp.c */
|
/* implemented in sigp.c */
|
||||||
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
|
||||||
|
|
|
@ -1248,6 +1248,8 @@ static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||||
|
|
||||||
static int handle_essa(struct kvm_vcpu *vcpu)
|
static int handle_essa(struct kvm_vcpu *vcpu)
|
||||||
{
|
{
|
||||||
|
lockdep_assert_held(&vcpu->kvm->srcu);
|
||||||
|
|
||||||
/* entries expected to be 1FF */
|
/* entries expected to be 1FF */
|
||||||
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
|
||||||
unsigned long *cbrlo;
|
unsigned long *cbrlo;
|
||||||
|
@ -1297,12 +1299,8 @@ static int handle_essa(struct kvm_vcpu *vcpu)
|
||||||
/* Retry the ESSA instruction */
|
/* Retry the ESSA instruction */
|
||||||
kvm_s390_retry_instr(vcpu);
|
kvm_s390_retry_instr(vcpu);
|
||||||
} else {
|
} else {
|
||||||
int srcu_idx;
|
|
||||||
|
|
||||||
mmap_read_lock(vcpu->kvm->mm);
|
mmap_read_lock(vcpu->kvm->mm);
|
||||||
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
|
|
||||||
i = __do_essa(vcpu, orc);
|
i = __do_essa(vcpu, orc);
|
||||||
srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
|
|
||||||
mmap_read_unlock(vcpu->kvm->mm);
|
mmap_read_unlock(vcpu->kvm->mm);
|
||||||
if (i < 0)
|
if (i < 0)
|
||||||
return i;
|
return i;
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include <linux/sched/mm.h>
|
#include <linux/sched/mm.h>
|
||||||
#include <linux/mmu_notifier.h>
|
#include <linux/mmu_notifier.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
bool kvm_s390_pv_is_protected(struct kvm *kvm)
|
bool kvm_s390_pv_is_protected(struct kvm *kvm)
|
||||||
{
|
{
|
||||||
|
@ -33,6 +32,64 @@ bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
|
EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kvm_s390_pv_make_secure() - make one guest page secure
|
||||||
|
* @kvm: the guest
|
||||||
|
* @gaddr: the guest address that needs to be made secure
|
||||||
|
* @uvcb: the UVCB specifying which operation needs to be performed
|
||||||
|
*
|
||||||
|
* Context: needs to be called with kvm->srcu held.
|
||||||
|
* Return: 0 on success, < 0 in case of error.
|
||||||
|
*/
|
||||||
|
int kvm_s390_pv_make_secure(struct kvm *kvm, unsigned long gaddr, void *uvcb)
|
||||||
|
{
|
||||||
|
unsigned long vmaddr;
|
||||||
|
|
||||||
|
lockdep_assert_held(&kvm->srcu);
|
||||||
|
|
||||||
|
vmaddr = gfn_to_hva(kvm, gpa_to_gfn(gaddr));
|
||||||
|
if (kvm_is_error_hva(vmaddr))
|
||||||
|
return -EFAULT;
|
||||||
|
return make_hva_secure(kvm->mm, vmaddr, uvcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
int kvm_s390_pv_convert_to_secure(struct kvm *kvm, unsigned long gaddr)
|
||||||
|
{
|
||||||
|
struct uv_cb_cts uvcb = {
|
||||||
|
.header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
|
||||||
|
.header.len = sizeof(uvcb),
|
||||||
|
.guest_handle = kvm_s390_pv_get_handle(kvm),
|
||||||
|
.gaddr = gaddr,
|
||||||
|
};
|
||||||
|
|
||||||
|
return kvm_s390_pv_make_secure(kvm, gaddr, &uvcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kvm_s390_pv_destroy_page() - Destroy a guest page.
|
||||||
|
* @kvm: the guest
|
||||||
|
* @gaddr: the guest address to destroy
|
||||||
|
*
|
||||||
|
* An attempt will be made to destroy the given guest page. If the attempt
|
||||||
|
* fails, an attempt is made to export the page. If both attempts fail, an
|
||||||
|
* appropriate error is returned.
|
||||||
|
*
|
||||||
|
* Context: may sleep.
|
||||||
|
*/
|
||||||
|
int kvm_s390_pv_destroy_page(struct kvm *kvm, unsigned long gaddr)
|
||||||
|
{
|
||||||
|
struct page *page;
|
||||||
|
int rc = 0;
|
||||||
|
|
||||||
|
mmap_read_lock(kvm->mm);
|
||||||
|
page = gfn_to_page(kvm, gpa_to_gfn(gaddr));
|
||||||
|
if (page)
|
||||||
|
rc = __kvm_s390_pv_destroy_page(page);
|
||||||
|
kvm_release_page_clean(page);
|
||||||
|
mmap_read_unlock(kvm->mm);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
|
* struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
|
||||||
* be destroyed
|
* be destroyed
|
||||||
|
@ -638,7 +695,7 @@ static int unpack_one(struct kvm *kvm, unsigned long addr, u64 tweak,
|
||||||
.tweak[0] = tweak,
|
.tweak[0] = tweak,
|
||||||
.tweak[1] = offset,
|
.tweak[1] = offset,
|
||||||
};
|
};
|
||||||
int ret = gmap_make_secure(kvm->arch.gmap, addr, &uvcb);
|
int ret = kvm_s390_pv_make_secure(kvm, addr, &uvcb);
|
||||||
unsigned long vmaddr;
|
unsigned long vmaddr;
|
||||||
bool unlocked;
|
bool unlocked;
|
||||||
|
|
||||||
|
|
|
@ -23,7 +23,6 @@
|
||||||
#include <asm/facility.h>
|
#include <asm/facility.h>
|
||||||
#include "kvm-s390.h"
|
#include "kvm-s390.h"
|
||||||
#include "gaccess.h"
|
#include "gaccess.h"
|
||||||
#include "gmap.h"
|
|
||||||
|
|
||||||
enum vsie_page_flags {
|
enum vsie_page_flags {
|
||||||
VSIE_PAGE_IN_USE = 0,
|
VSIE_PAGE_IN_USE = 0,
|
||||||
|
@ -68,6 +67,24 @@ struct vsie_page {
|
||||||
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
|
__u8 fac[S390_ARCH_FAC_LIST_SIZE_BYTE]; /* 0x0800 */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_shadow_valid() - check if a shadow guest address space matches the
|
||||||
|
* given properties and is still valid
|
||||||
|
* @sg: pointer to the shadow guest address space structure
|
||||||
|
* @asce: ASCE for which the shadow table is requested
|
||||||
|
* @edat_level: edat level to be used for the shadow translation
|
||||||
|
*
|
||||||
|
* Returns 1 if the gmap shadow is still valid and matches the given
|
||||||
|
* properties, the caller can continue using it. Returns 0 otherwise; the
|
||||||
|
* caller has to request a new shadow gmap in this case.
|
||||||
|
*/
|
||||||
|
int gmap_shadow_valid(struct gmap *sg, unsigned long asce, int edat_level)
|
||||||
|
{
|
||||||
|
if (sg->removed)
|
||||||
|
return 0;
|
||||||
|
return sg->orig_asce == asce && sg->edat_level == edat_level;
|
||||||
|
}
|
||||||
|
|
||||||
/* trigger a validity icpt for the given scb */
|
/* trigger a validity icpt for the given scb */
|
||||||
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
|
static int set_validity_icpt(struct kvm_s390_sie_block *scb,
|
||||||
__u16 reason_code)
|
__u16 reason_code)
|
||||||
|
|
|
@ -12,3 +12,5 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
|
||||||
obj-$(CONFIG_PTDUMP) += dump_pagetables.o
|
obj-$(CONFIG_PTDUMP) += dump_pagetables.o
|
||||||
obj-$(CONFIG_PGSTE) += gmap.o
|
obj-$(CONFIG_PGSTE) += gmap.o
|
||||||
obj-$(CONFIG_PFAULT) += pfault.o
|
obj-$(CONFIG_PFAULT) += pfault.o
|
||||||
|
|
||||||
|
obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
|
||||||
|
|
|
@ -40,7 +40,6 @@
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
#include <asm/fault.h>
|
#include <asm/fault.h>
|
||||||
#include <asm/diag.h>
|
#include <asm/diag.h>
|
||||||
#include <asm/gmap.h>
|
|
||||||
#include <asm/irq.h>
|
#include <asm/irq.h>
|
||||||
#include <asm/facility.h>
|
#include <asm/facility.h>
|
||||||
#include <asm/uv.h>
|
#include <asm/uv.h>
|
||||||
|
|
|
@ -22,9 +22,9 @@
|
||||||
#include <asm/page-states.h>
|
#include <asm/page-states.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/machine.h>
|
#include <asm/machine.h>
|
||||||
|
#include <asm/gmap_helpers.h>
|
||||||
#include <asm/gmap.h>
|
#include <asm/gmap.h>
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/tlb.h>
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The address is saved in a radix tree directly; NULL would be ambiguous,
|
* The address is saved in a radix tree directly; NULL would be ambiguous,
|
||||||
|
@ -620,63 +620,20 @@ EXPORT_SYMBOL(__gmap_link);
|
||||||
*/
|
*/
|
||||||
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
|
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
|
||||||
unsigned long vmaddr;
|
unsigned long vmaddr;
|
||||||
spinlock_t *ptl;
|
|
||||||
pte_t *ptep;
|
mmap_assert_locked(gmap->mm);
|
||||||
|
|
||||||
/* Find the vm address for the guest address */
|
/* Find the vm address for the guest address */
|
||||||
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
|
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
|
||||||
gaddr >> PMD_SHIFT);
|
gaddr >> PMD_SHIFT);
|
||||||
if (vmaddr) {
|
if (vmaddr) {
|
||||||
vmaddr |= gaddr & ~PMD_MASK;
|
vmaddr |= gaddr & ~PMD_MASK;
|
||||||
|
gmap_helper_zap_one_page(gmap->mm, vmaddr);
|
||||||
vma = vma_lookup(gmap->mm, vmaddr);
|
|
||||||
if (!vma || is_vm_hugetlb_page(vma))
|
|
||||||
return;
|
|
||||||
|
|
||||||
/* Get pointer to the page table entry */
|
|
||||||
ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
|
|
||||||
if (likely(ptep)) {
|
|
||||||
ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
|
|
||||||
pte_unmap_unlock(ptep, ptl);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__gmap_zap);
|
EXPORT_SYMBOL_GPL(__gmap_zap);
|
||||||
|
|
||||||
void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
|
|
||||||
{
|
|
||||||
unsigned long gaddr, vmaddr, size;
|
|
||||||
struct vm_area_struct *vma;
|
|
||||||
|
|
||||||
mmap_read_lock(gmap->mm);
|
|
||||||
for (gaddr = from; gaddr < to;
|
|
||||||
gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
|
|
||||||
/* Find the vm address for the guest address */
|
|
||||||
vmaddr = (unsigned long)
|
|
||||||
radix_tree_lookup(&gmap->guest_to_host,
|
|
||||||
gaddr >> PMD_SHIFT);
|
|
||||||
if (!vmaddr)
|
|
||||||
continue;
|
|
||||||
vmaddr |= gaddr & ~PMD_MASK;
|
|
||||||
/* Find vma in the parent mm */
|
|
||||||
vma = find_vma(gmap->mm, vmaddr);
|
|
||||||
if (!vma)
|
|
||||||
continue;
|
|
||||||
/*
|
|
||||||
* We do not discard pages that are backed by
|
|
||||||
* hugetlbfs, so we don't have to refault them.
|
|
||||||
*/
|
|
||||||
if (is_vm_hugetlb_page(vma))
|
|
||||||
continue;
|
|
||||||
size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
|
|
||||||
zap_page_range_single(vma, vmaddr, size, NULL);
|
|
||||||
}
|
|
||||||
mmap_read_unlock(gmap->mm);
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(gmap_discard);
|
|
||||||
|
|
||||||
static LIST_HEAD(gmap_notifier_list);
|
static LIST_HEAD(gmap_notifier_list);
|
||||||
static DEFINE_SPINLOCK(gmap_notifier_lock);
|
static DEFINE_SPINLOCK(gmap_notifier_lock);
|
||||||
|
|
||||||
|
@ -2269,138 +2226,6 @@ int s390_enable_sie(void)
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(s390_enable_sie);
|
EXPORT_SYMBOL_GPL(s390_enable_sie);
|
||||||
|
|
||||||
static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
|
|
||||||
unsigned long end, struct mm_walk *walk)
|
|
||||||
{
|
|
||||||
unsigned long *found_addr = walk->private;
|
|
||||||
|
|
||||||
/* Return 1 of the page is a zeropage. */
|
|
||||||
if (is_zero_pfn(pte_pfn(*pte))) {
|
|
||||||
/*
|
|
||||||
* Shared zeropage in e.g., a FS DAX mapping? We cannot do the
|
|
||||||
* right thing and likely don't care: FAULT_FLAG_UNSHARE
|
|
||||||
* currently only works in COW mappings, which is also where
|
|
||||||
* mm_forbids_zeropage() is checked.
|
|
||||||
*/
|
|
||||||
if (!is_cow_mapping(walk->vma->vm_flags))
|
|
||||||
return -EFAULT;
|
|
||||||
|
|
||||||
*found_addr = addr;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const struct mm_walk_ops find_zeropage_ops = {
|
|
||||||
.pte_entry = find_zeropage_pte_entry,
|
|
||||||
.walk_lock = PGWALK_WRLOCK,
|
|
||||||
};
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Unshare all shared zeropages, replacing them by anonymous pages. Note that
|
|
||||||
* we cannot simply zap all shared zeropages, because this could later
|
|
||||||
* trigger unexpected userfaultfd missing events.
|
|
||||||
*
|
|
||||||
* This must be called after mm->context.allow_cow_sharing was
|
|
||||||
* set to 0, to avoid future mappings of shared zeropages.
|
|
||||||
*
|
|
||||||
* mm contracts with s390, that even if mm were to remove a page table,
|
|
||||||
* and racing with walk_page_range_vma() calling pte_offset_map_lock()
|
|
||||||
* would fail, it will never insert a page table containing empty zero
|
|
||||||
* pages once mm_forbids_zeropage(mm) i.e.
|
|
||||||
* mm->context.allow_cow_sharing is set to 0.
|
|
||||||
*/
|
|
||||||
static int __s390_unshare_zeropages(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
struct vm_area_struct *vma;
|
|
||||||
VMA_ITERATOR(vmi, mm, 0);
|
|
||||||
unsigned long addr;
|
|
||||||
vm_fault_t fault;
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
for_each_vma(vmi, vma) {
|
|
||||||
/*
|
|
||||||
* We could only look at COW mappings, but it's more future
|
|
||||||
* proof to catch unexpected zeropages in other mappings and
|
|
||||||
* fail.
|
|
||||||
*/
|
|
||||||
if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
|
|
||||||
continue;
|
|
||||||
addr = vma->vm_start;
|
|
||||||
|
|
||||||
retry:
|
|
||||||
rc = walk_page_range_vma(vma, addr, vma->vm_end,
|
|
||||||
&find_zeropage_ops, &addr);
|
|
||||||
if (rc < 0)
|
|
||||||
return rc;
|
|
||||||
else if (!rc)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/* addr was updated by find_zeropage_pte_entry() */
|
|
||||||
fault = handle_mm_fault(vma, addr,
|
|
||||||
FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
|
|
||||||
NULL);
|
|
||||||
if (fault & VM_FAULT_OOM)
|
|
||||||
return -ENOMEM;
|
|
||||||
/*
|
|
||||||
* See break_ksm(): even after handle_mm_fault() returned 0, we
|
|
||||||
* must start the lookup from the current address, because
|
|
||||||
* handle_mm_fault() may back out if there's any difficulty.
|
|
||||||
*
|
|
||||||
* VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
|
|
||||||
* maybe they could trigger in the future on concurrent
|
|
||||||
* truncation. In that case, the shared zeropage would be gone
|
|
||||||
* and we can simply retry and make progress.
|
|
||||||
*/
|
|
||||||
cond_resched();
|
|
||||||
goto retry;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int __s390_disable_cow_sharing(struct mm_struct *mm)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
if (!mm->context.allow_cow_sharing)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
mm->context.allow_cow_sharing = 0;
|
|
||||||
|
|
||||||
/* Replace all shared zeropages by anonymous pages. */
|
|
||||||
rc = __s390_unshare_zeropages(mm);
|
|
||||||
/*
|
|
||||||
* Make sure to disable KSM (if enabled for the whole process or
|
|
||||||
* individual VMAs). Note that nothing currently hinders user space
|
|
||||||
* from re-enabling it.
|
|
||||||
*/
|
|
||||||
if (!rc)
|
|
||||||
rc = ksm_disable(mm);
|
|
||||||
if (rc)
|
|
||||||
mm->context.allow_cow_sharing = 1;
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Disable most COW-sharing of memory pages for the whole process:
|
|
||||||
* (1) Disable KSM and unmerge/unshare any KSM pages.
|
|
||||||
* (2) Disallow shared zeropages and unshare any zerpages that are mapped.
|
|
||||||
*
|
|
||||||
* Not that we currently don't bother with COW-shared pages that are shared
|
|
||||||
* with parent/child processes due to fork().
|
|
||||||
*/
|
|
||||||
int s390_disable_cow_sharing(void)
|
|
||||||
{
|
|
||||||
int rc;
|
|
||||||
|
|
||||||
mmap_write_lock(current->mm);
|
|
||||||
rc = __s390_disable_cow_sharing(current->mm);
|
|
||||||
mmap_write_unlock(current->mm);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Enable storage key handling from now on and initialize the storage
|
* Enable storage key handling from now on and initialize the storage
|
||||||
* keys with the default key.
|
* keys with the default key.
|
||||||
|
@ -2468,7 +2293,7 @@ int s390_enable_skey(void)
|
||||||
goto out_up;
|
goto out_up;
|
||||||
|
|
||||||
mm->context.uses_skeys = 1;
|
mm->context.uses_skeys = 1;
|
||||||
rc = __s390_disable_cow_sharing(mm);
|
rc = gmap_helper_disable_cow_sharing();
|
||||||
if (rc) {
|
if (rc) {
|
||||||
mm->context.uses_skeys = 0;
|
mm->context.uses_skeys = 0;
|
||||||
goto out_up;
|
goto out_up;
|
||||||
|
|
221
arch/s390/mm/gmap_helpers.c
Normal file
221
arch/s390/mm/gmap_helpers.c
Normal file
|
@ -0,0 +1,221 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
/*
|
||||||
|
* Helper functions for KVM guest address space mapping code
|
||||||
|
*
|
||||||
|
* Copyright IBM Corp. 2007, 2025
|
||||||
|
*/
|
||||||
|
#include <linux/mm_types.h>
|
||||||
|
#include <linux/mmap_lock.h>
|
||||||
|
#include <linux/mm.h>
|
||||||
|
#include <linux/hugetlb.h>
|
||||||
|
#include <linux/swap.h>
|
||||||
|
#include <linux/swapops.h>
|
||||||
|
#include <linux/pagewalk.h>
|
||||||
|
#include <linux/ksm.h>
|
||||||
|
#include <asm/gmap_helpers.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ptep_zap_swap_entry() - discard a swap entry.
|
||||||
|
* @mm: the mm
|
||||||
|
* @entry: the swap entry that needs to be zapped
|
||||||
|
*
|
||||||
|
* Discards the given swap entry. If the swap entry was an actual swap
|
||||||
|
* entry (and not a migration entry, for example), the actual swapped
|
||||||
|
* page is also discarded from swap.
|
||||||
|
*/
|
||||||
|
static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
|
||||||
|
{
|
||||||
|
if (!non_swap_entry(entry))
|
||||||
|
dec_mm_counter(mm, MM_SWAPENTS);
|
||||||
|
else if (is_migration_entry(entry))
|
||||||
|
dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
|
||||||
|
free_swap_and_cache(entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_helper_zap_one_page() - discard a page if it was swapped.
|
||||||
|
* @mm: the mm
|
||||||
|
* @vmaddr: the userspace virtual address that needs to be discarded
|
||||||
|
*
|
||||||
|
* If the given address maps to a swap entry, discard it.
|
||||||
|
*
|
||||||
|
* Context: needs to be called while holding the mmap lock.
|
||||||
|
*/
|
||||||
|
void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
pte_t *ptep;
|
||||||
|
|
||||||
|
mmap_assert_locked(mm);
|
||||||
|
|
||||||
|
/* Find the vm address for the guest address */
|
||||||
|
vma = vma_lookup(mm, vmaddr);
|
||||||
|
if (!vma || is_vm_hugetlb_page(vma))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Get pointer to the page table entry */
|
||||||
|
ptep = get_locked_pte(mm, vmaddr, &ptl);
|
||||||
|
if (unlikely(!ptep))
|
||||||
|
return;
|
||||||
|
if (pte_swap(*ptep))
|
||||||
|
ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
|
||||||
|
pte_unmap_unlock(ptep, ptl);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_helper_discard() - discard user pages in the given range
|
||||||
|
* @mm: the mm
|
||||||
|
* @vmaddr: starting userspace address
|
||||||
|
* @end: end address (first address outside the range)
|
||||||
|
*
|
||||||
|
* All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
|
||||||
|
*
|
||||||
|
* Context: needs to be called while holding the mmap lock.
|
||||||
|
*/
|
||||||
|
void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
|
mmap_assert_locked(mm);
|
||||||
|
|
||||||
|
while (vmaddr < end) {
|
||||||
|
vma = find_vma_intersection(mm, vmaddr, end);
|
||||||
|
if (!vma)
|
||||||
|
return;
|
||||||
|
if (!is_vm_hugetlb_page(vma))
|
||||||
|
zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
|
||||||
|
vmaddr = vma->vm_end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_helper_discard);
|
||||||
|
|
||||||
|
static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
|
||||||
|
unsigned long end, struct mm_walk *walk)
|
||||||
|
{
|
||||||
|
unsigned long *found_addr = walk->private;
|
||||||
|
|
||||||
|
/* Return 1 of the page is a zeropage. */
|
||||||
|
if (is_zero_pfn(pte_pfn(*pte))) {
|
||||||
|
/*
|
||||||
|
* Shared zeropage in e.g., a FS DAX mapping? We cannot do the
|
||||||
|
* right thing and likely don't care: FAULT_FLAG_UNSHARE
|
||||||
|
* currently only works in COW mappings, which is also where
|
||||||
|
* mm_forbids_zeropage() is checked.
|
||||||
|
*/
|
||||||
|
if (!is_cow_mapping(walk->vma->vm_flags))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
*found_addr = addr;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct mm_walk_ops find_zeropage_ops = {
|
||||||
|
.pte_entry = find_zeropage_pte_entry,
|
||||||
|
.walk_lock = PGWALK_WRLOCK,
|
||||||
|
};
|
||||||
|
|
||||||
|
/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
|
||||||
|
* @mm: the mm whose zero pages are to be unshared
|
||||||
|
*
|
||||||
|
* Unshare all shared zeropages, replacing them by anonymous pages. Note that
|
||||||
|
* we cannot simply zap all shared zeropages, because this could later
|
||||||
|
* trigger unexpected userfaultfd missing events.
|
||||||
|
*
|
||||||
|
* This must be called after mm->context.allow_cow_sharing was
|
||||||
|
* set to 0, to avoid future mappings of shared zeropages.
|
||||||
|
*
|
||||||
|
* mm contracts with s390, that even if mm were to remove a page table,
|
||||||
|
* and racing with walk_page_range_vma() calling pte_offset_map_lock()
|
||||||
|
* would fail, it will never insert a page table containing empty zero
|
||||||
|
* pages once mm_forbids_zeropage(mm) i.e.
|
||||||
|
* mm->context.allow_cow_sharing is set to 0.
|
||||||
|
*/
|
||||||
|
static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
struct vm_area_struct *vma;
|
||||||
|
VMA_ITERATOR(vmi, mm, 0);
|
||||||
|
unsigned long addr;
|
||||||
|
vm_fault_t fault;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
for_each_vma(vmi, vma) {
|
||||||
|
/*
|
||||||
|
* We could only look at COW mappings, but it's more future
|
||||||
|
* proof to catch unexpected zeropages in other mappings and
|
||||||
|
* fail.
|
||||||
|
*/
|
||||||
|
if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
|
||||||
|
continue;
|
||||||
|
addr = vma->vm_start;
|
||||||
|
|
||||||
|
retry:
|
||||||
|
rc = walk_page_range_vma(vma, addr, vma->vm_end,
|
||||||
|
&find_zeropage_ops, &addr);
|
||||||
|
if (rc < 0)
|
||||||
|
return rc;
|
||||||
|
else if (!rc)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* addr was updated by find_zeropage_pte_entry() */
|
||||||
|
fault = handle_mm_fault(vma, addr,
|
||||||
|
FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
|
||||||
|
NULL);
|
||||||
|
if (fault & VM_FAULT_OOM)
|
||||||
|
return -ENOMEM;
|
||||||
|
/*
|
||||||
|
* See break_ksm(): even after handle_mm_fault() returned 0, we
|
||||||
|
* must start the lookup from the current address, because
|
||||||
|
* handle_mm_fault() may back out if there's any difficulty.
|
||||||
|
*
|
||||||
|
* VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
|
||||||
|
* maybe they could trigger in the future on concurrent
|
||||||
|
* truncation. In that case, the shared zeropage would be gone
|
||||||
|
* and we can simply retry and make progress.
|
||||||
|
*/
|
||||||
|
cond_resched();
|
||||||
|
goto retry;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gmap_helper_disable_cow_sharing() - disable all COW sharing
|
||||||
|
*
|
||||||
|
* Disable most COW-sharing of memory pages for the whole process:
|
||||||
|
* (1) Disable KSM and unmerge/unshare any KSM pages.
|
||||||
|
* (2) Disallow shared zeropages and unshare any zerpages that are mapped.
|
||||||
|
*
|
||||||
|
* Not that we currently don't bother with COW-shared pages that are shared
|
||||||
|
* with parent/child processes due to fork().
|
||||||
|
*/
|
||||||
|
int gmap_helper_disable_cow_sharing(void)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
mmap_assert_write_locked(mm);
|
||||||
|
|
||||||
|
if (!mm->context.allow_cow_sharing)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
mm->context.allow_cow_sharing = 0;
|
||||||
|
|
||||||
|
/* Replace all shared zeropages by anonymous pages. */
|
||||||
|
rc = __gmap_helper_unshare_zeropages(mm);
|
||||||
|
/*
|
||||||
|
* Make sure to disable KSM (if enabled for the whole process or
|
||||||
|
* individual VMAs). Note that nothing currently hinders user space
|
||||||
|
* from re-enabling it.
|
||||||
|
*/
|
||||||
|
if (!rc)
|
||||||
|
rc = ksm_disable(mm);
|
||||||
|
if (rc)
|
||||||
|
mm->context.allow_cow_sharing = 1;
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);
|
|
@ -40,7 +40,6 @@
|
||||||
#include <asm/kfence.h>
|
#include <asm/kfence.h>
|
||||||
#include <asm/dma.h>
|
#include <asm/dma.h>
|
||||||
#include <asm/abs_lowcore.h>
|
#include <asm/abs_lowcore.h>
|
||||||
#include <asm/tlb.h>
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/sections.h>
|
#include <asm/sections.h>
|
||||||
#include <asm/sclp.h>
|
#include <asm/sclp.h>
|
||||||
|
|
|
@ -12,8 +12,6 @@
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/page-states.h>
|
#include <asm/page-states.h>
|
||||||
#include <asm/pgalloc.h>
|
#include <asm/pgalloc.h>
|
||||||
#include <asm/gmap.h>
|
|
||||||
#include <asm/tlb.h>
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
|
|
||||||
unsigned long *crst_table_alloc(struct mm_struct *mm)
|
unsigned long *crst_table_alloc(struct mm_struct *mm)
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
#include <linux/ksm.h>
|
#include <linux/ksm.h>
|
||||||
#include <linux/mman.h>
|
#include <linux/mman.h>
|
||||||
|
|
||||||
#include <asm/tlb.h>
|
|
||||||
#include <asm/tlbflush.h>
|
#include <asm/tlbflush.h>
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/page-states.h>
|
#include <asm/page-states.h>
|
||||||
|
|
Loading…
Reference in New Issue
Block a user