linux-imx/arch/x86/include/asm/kexec.h
Eric DeVolder a72bbec70d crash: hotplug support for kexec_load()
The hotplug support for kexec_load() requires changes to the userspace
kexec-tools and a little extra help from the kernel.

Given a kdump capture kernel loaded via kexec_load(), and a subsequent
hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites
it to reflect the hotplug change.  That is the desired outcome, however,
at kernel panic time, the purgatory integrity check fails (because the
elfcorehdr changed), and the capture kernel does not boot and no vmcore is
generated.

Therefore, the userspace kexec-tools/kexec must indicate to the kernel
that the elfcorehdr can be modified (because the kexec excluded the
elfcorehdr from the digest, and sized the elfcorehdr memory buffer
appropriately).

To facilitate hotplug support with kexec_load():
 - a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is
   safe for the kernel to modify the kexec_load()'d elfcorehdr
 - the /sys/kernel/crash_elfcorehdr_size node communicates the
   preferred size of the elfcorehdr memory buffer
 - The sysfs crash_hotplug nodes (ie.
   /sys/devices/system/[cpu|memory]/crash_hotplug) dynamically
   take into account kexec_file_load() vs kexec_load() and
   KEXEC_UPDATE_ELFCOREHDR.
   This is critical so that the udev rule processing of crash_hotplug
   is all that is needed to determine if the userspace unload-then-load
   of the kdump image is to be skipped, or not. The proposed udev
   rule change looks like:
   # The kernel updates the crash elfcorehdr for CPU and memory changes
   SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"
   SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end"

The table below indicates the behavior of kexec_load()'d kdump image
updates (with the new udev crash_hotplug rule in place):

 Kernel |Kexec
 -------+-----+----
 Old    |Old  |New
        |  a  | a
 -------+-----+----
 New    |  a  | b
 -------+-----+----

where kexec 'old' and 'new' delineate kexec-tools has the needed
modifications for the crash hotplug feature, and kernel 'old' and 'new'
delineate the kernel supports this crash hotplug feature.

Behavior 'a' indicates the unload-then-reload of the entire kdump image. 
For the kexec 'old' column, the unload-then-reload occurs due to the
missing flag KEXEC_UPDATE_ELFCOREHDR.  An 'old' kernel (with 'new' kexec)
does not present the crash_hotplug sysfs node, which leads to the
unload-then-reload of the kdump image.

Behavior 'b' indicates the desired optimized behavior of the kernel
directly modifying the elfcorehdr and avoiding the unload-then-reload of
the kdump image.

If the udev rule is not updated with crash_hotplug node check, then no
matter any combination of kernel or kexec is new or old, the kdump image
continues to be unload-then-reload on hotplug changes.

To fully support crash hotplug feature, there needs to be a rollout of
kernel, kexec-tools and udev rule changes.  However, the order of the
rollout of these pieces does not matter; kexec_load()'d kdump images still
function for hotplug as-is.

Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com
Signed-off-by: Eric DeVolder <eric.devolder@oracle.com>
Suggested-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Hari Bathini <hbathini@linux.ibm.com>
Acked-by: Baoquan He <bhe@redhat.com>
Cc: Akhil Raj <lf32.dev@gmail.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mimi Zohar <zohar@linux.ibm.com>
Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Sourabh Jain <sourabhjain@linux.ibm.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Thomas Weißschuh <linux@weissschuh.net>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-08-24 16:25:14 -07:00

233 lines
6.8 KiB
C

/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_KEXEC_H
#define _ASM_X86_KEXEC_H
#ifdef CONFIG_X86_32
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
# define PA_PGD 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#else
# define PA_CONTROL_PAGE 0
# define VA_CONTROL_PAGE 1
# define PA_TABLE_PAGE 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
#endif
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
#ifndef __ASSEMBLY__
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/bootparam.h>
struct kimage;
/*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
* I.e. Maximum page that is mapped directly into kernel memory,
* and kmap is not required.
*
* So far x86_64 is limited to 40 physical address bits.
*/
#ifdef CONFIG_X86_32
/* Maximum physical address we can use pages from */
# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
/* Maximum address we can reach in physical address mode */
# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
# define KEXEC_CONTROL_PAGE_SIZE 4096
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
/* We can also handle crash dumps from 64 bit kernel. */
# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
#else
/* Maximum physical address we can use pages from */
# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can reach in physical address mode */
# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
/* Allocate one page for the pdp and the second for the code */
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
* mode exception.
*/
static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs)
{
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
} else {
#ifdef CONFIG_X86_32
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
asm volatile("movl %%esi,%0" : "=m"(newregs->si));
asm volatile("movl %%edi,%0" : "=m"(newregs->di));
asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
#else
asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
#endif
newregs->ip = _THIS_IP_;
}
}
#ifdef CONFIG_X86_32
asmlinkage unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long control_page,
unsigned long start_address,
unsigned int has_pae,
unsigned int preserve_context);
#else
unsigned long
relocate_kernel(unsigned long indirection_page,
unsigned long page_list,
unsigned long start_address,
unsigned int preserve_context,
unsigned int host_mem_enc_active);
#endif
#define ARCH_HAS_KIMAGE_ARCH
#ifdef CONFIG_X86_32
struct kimage_arch {
pgd_t *pgd;
#ifdef CONFIG_X86_PAE
pmd_t *pmd0;
pmd_t *pmd1;
#endif
pte_t *pte0;
pte_t *pte1;
};
#else
struct kimage_arch {
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
};
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_X86_64
/*
* Number of elements and order of elements in this structure should match
* with the ones in arch/x86/purgatory/entry64.S. If you make a change here
* make an appropriate change in purgatory too.
*/
struct kexec_entry64_regs {
uint64_t rax;
uint64_t rcx;
uint64_t rdx;
uint64_t rbx;
uint64_t rsp;
uint64_t rbp;
uint64_t rsi;
uint64_t rdi;
uint64_t r8;
uint64_t r9;
uint64_t r10;
uint64_t r11;
uint64_t r12;
uint64_t r13;
uint64_t r14;
uint64_t r15;
uint64_t rip;
};
extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
gfp_t gfp);
#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
void arch_kexec_protect_crashkres(void);
#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
void arch_kexec_unprotect_crashkres(void);
#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
#ifdef CONFIG_KEXEC_FILE
struct purgatory_info;
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
Elf_Shdr *section,
const Elf_Shdr *relsec,
const Elf_Shdr *symtab);
#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
int arch_kimage_file_post_load_cleanup(struct kimage *image);
#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
#endif
#endif
typedef void crash_vmclear_fn(void);
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
void arch_crash_handle_hotplug_event(struct kimage *image);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
#ifdef CONFIG_HOTPLUG_CPU
int arch_crash_hotplug_cpu_support(void);
#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
int arch_crash_hotplug_memory_support(void);
#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
#endif
unsigned int arch_crash_get_elfcorehdr_size(void);
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
#endif
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */