mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-07-19 07:39:54 +02:00

The hotplug support for kexec_load() requires changes to the userspace kexec-tools and a little extra help from the kernel. Given a kdump capture kernel loaded via kexec_load(), and a subsequent hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites it to reflect the hotplug change. That is the desired outcome, however, at kernel panic time, the purgatory integrity check fails (because the elfcorehdr changed), and the capture kernel does not boot and no vmcore is generated. Therefore, the userspace kexec-tools/kexec must indicate to the kernel that the elfcorehdr can be modified (because the kexec excluded the elfcorehdr from the digest, and sized the elfcorehdr memory buffer appropriately). To facilitate hotplug support with kexec_load(): - a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is safe for the kernel to modify the kexec_load()'d elfcorehdr - the /sys/kernel/crash_elfcorehdr_size node communicates the preferred size of the elfcorehdr memory buffer - The sysfs crash_hotplug nodes (ie. /sys/devices/system/[cpu|memory]/crash_hotplug) dynamically take into account kexec_file_load() vs kexec_load() and KEXEC_UPDATE_ELFCOREHDR. This is critical so that the udev rule processing of crash_hotplug is all that is needed to determine if the userspace unload-then-load of the kdump image is to be skipped, or not. The proposed udev rule change looks like: # The kernel updates the crash elfcorehdr for CPU and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" The table below indicates the behavior of kexec_load()'d kdump image updates (with the new udev crash_hotplug rule in place): Kernel |Kexec -------+-----+---- Old |Old |New | a | a -------+-----+---- New | a | b -------+-----+---- where kexec 'old' and 'new' delineate kexec-tools has the needed modifications for the crash hotplug feature, and kernel 'old' and 'new' delineate the kernel supports this crash hotplug feature. Behavior 'a' indicates the unload-then-reload of the entire kdump image. For the kexec 'old' column, the unload-then-reload occurs due to the missing flag KEXEC_UPDATE_ELFCOREHDR. An 'old' kernel (with 'new' kexec) does not present the crash_hotplug sysfs node, which leads to the unload-then-reload of the kdump image. Behavior 'b' indicates the desired optimized behavior of the kernel directly modifying the elfcorehdr and avoiding the unload-then-reload of the kdump image. If the udev rule is not updated with crash_hotplug node check, then no matter any combination of kernel or kexec is new or old, the kdump image continues to be unload-then-reload on hotplug changes. To fully support crash hotplug feature, there needs to be a rollout of kernel, kexec-tools and udev rule changes. However, the order of the rollout of these pieces does not matter; kexec_load()'d kdump images still function for hotplug as-is. Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com Signed-off-by: Eric DeVolder <eric.devolder@oracle.com> Suggested-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Baoquan He <bhe@redhat.com> Cc: Akhil Raj <lf32.dev@gmail.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Borislav Petkov (AMD) <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Mimi Zohar <zohar@linux.ibm.com> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Sean Christopherson <seanjc@google.com> Cc: Sourabh Jain <sourabhjain@linux.ibm.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Weißschuh <linux@weissschuh.net> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
233 lines
6.8 KiB
C
233 lines
6.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_KEXEC_H
|
|
#define _ASM_X86_KEXEC_H
|
|
|
|
#ifdef CONFIG_X86_32
|
|
# define PA_CONTROL_PAGE 0
|
|
# define VA_CONTROL_PAGE 1
|
|
# define PA_PGD 2
|
|
# define PA_SWAP_PAGE 3
|
|
# define PAGES_NR 4
|
|
#else
|
|
# define PA_CONTROL_PAGE 0
|
|
# define VA_CONTROL_PAGE 1
|
|
# define PA_TABLE_PAGE 2
|
|
# define PA_SWAP_PAGE 3
|
|
# define PAGES_NR 4
|
|
#endif
|
|
|
|
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/string.h>
|
|
#include <linux/kernel.h>
|
|
|
|
#include <asm/page.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/bootparam.h>
|
|
|
|
struct kimage;
|
|
|
|
/*
|
|
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
|
|
* I.e. Maximum page that is mapped directly into kernel memory,
|
|
* and kmap is not required.
|
|
*
|
|
* So far x86_64 is limited to 40 physical address bits.
|
|
*/
|
|
#ifdef CONFIG_X86_32
|
|
/* Maximum physical address we can use pages from */
|
|
# define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
|
|
/* Maximum address we can reach in physical address mode */
|
|
# define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
|
|
/* Maximum address we can use for the control code buffer */
|
|
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
|
|
|
|
# define KEXEC_CONTROL_PAGE_SIZE 4096
|
|
|
|
/* The native architecture */
|
|
# define KEXEC_ARCH KEXEC_ARCH_386
|
|
|
|
/* We can also handle crash dumps from 64 bit kernel. */
|
|
# define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64)
|
|
#else
|
|
/* Maximum physical address we can use pages from */
|
|
# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1)
|
|
/* Maximum address we can reach in physical address mode */
|
|
# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1)
|
|
/* Maximum address we can use for the control pages */
|
|
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
|
|
|
|
/* Allocate one page for the pdp and the second for the code */
|
|
# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
|
|
|
|
/* The native architecture */
|
|
# define KEXEC_ARCH KEXEC_ARCH_X86_64
|
|
#endif
|
|
|
|
/*
|
|
* This function is responsible for capturing register states if coming
|
|
* via panic otherwise just fix up the ss and sp if coming via kernel
|
|
* mode exception.
|
|
*/
|
|
static inline void crash_setup_regs(struct pt_regs *newregs,
|
|
struct pt_regs *oldregs)
|
|
{
|
|
if (oldregs) {
|
|
memcpy(newregs, oldregs, sizeof(*newregs));
|
|
} else {
|
|
#ifdef CONFIG_X86_32
|
|
asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
|
|
asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
|
|
asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
|
|
asm volatile("movl %%esi,%0" : "=m"(newregs->si));
|
|
asm volatile("movl %%edi,%0" : "=m"(newregs->di));
|
|
asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
|
|
asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
|
|
asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
|
|
asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
|
|
asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
|
|
asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
|
|
asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
|
|
asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
|
|
#else
|
|
asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
|
|
asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
|
|
asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
|
|
asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
|
|
asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
|
|
asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
|
|
asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
|
|
asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
|
|
asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
|
|
asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
|
|
asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
|
|
asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
|
|
asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
|
|
asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
|
|
asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
|
|
asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
|
|
asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
|
|
asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
|
|
asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
|
|
#endif
|
|
newregs->ip = _THIS_IP_;
|
|
}
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
asmlinkage unsigned long
|
|
relocate_kernel(unsigned long indirection_page,
|
|
unsigned long control_page,
|
|
unsigned long start_address,
|
|
unsigned int has_pae,
|
|
unsigned int preserve_context);
|
|
#else
|
|
unsigned long
|
|
relocate_kernel(unsigned long indirection_page,
|
|
unsigned long page_list,
|
|
unsigned long start_address,
|
|
unsigned int preserve_context,
|
|
unsigned int host_mem_enc_active);
|
|
#endif
|
|
|
|
#define ARCH_HAS_KIMAGE_ARCH
|
|
|
|
#ifdef CONFIG_X86_32
|
|
struct kimage_arch {
|
|
pgd_t *pgd;
|
|
#ifdef CONFIG_X86_PAE
|
|
pmd_t *pmd0;
|
|
pmd_t *pmd1;
|
|
#endif
|
|
pte_t *pte0;
|
|
pte_t *pte1;
|
|
};
|
|
#else
|
|
struct kimage_arch {
|
|
p4d_t *p4d;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
};
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/*
|
|
* Number of elements and order of elements in this structure should match
|
|
* with the ones in arch/x86/purgatory/entry64.S. If you make a change here
|
|
* make an appropriate change in purgatory too.
|
|
*/
|
|
struct kexec_entry64_regs {
|
|
uint64_t rax;
|
|
uint64_t rcx;
|
|
uint64_t rdx;
|
|
uint64_t rbx;
|
|
uint64_t rsp;
|
|
uint64_t rbp;
|
|
uint64_t rsi;
|
|
uint64_t rdi;
|
|
uint64_t r8;
|
|
uint64_t r9;
|
|
uint64_t r10;
|
|
uint64_t r11;
|
|
uint64_t r12;
|
|
uint64_t r13;
|
|
uint64_t r14;
|
|
uint64_t r15;
|
|
uint64_t rip;
|
|
};
|
|
|
|
extern int arch_kexec_post_alloc_pages(void *vaddr, unsigned int pages,
|
|
gfp_t gfp);
|
|
#define arch_kexec_post_alloc_pages arch_kexec_post_alloc_pages
|
|
|
|
extern void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages);
|
|
#define arch_kexec_pre_free_pages arch_kexec_pre_free_pages
|
|
|
|
void arch_kexec_protect_crashkres(void);
|
|
#define arch_kexec_protect_crashkres arch_kexec_protect_crashkres
|
|
|
|
void arch_kexec_unprotect_crashkres(void);
|
|
#define arch_kexec_unprotect_crashkres arch_kexec_unprotect_crashkres
|
|
|
|
#ifdef CONFIG_KEXEC_FILE
|
|
struct purgatory_info;
|
|
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
|
|
Elf_Shdr *section,
|
|
const Elf_Shdr *relsec,
|
|
const Elf_Shdr *symtab);
|
|
#define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
|
|
|
|
int arch_kimage_file_post_load_cleanup(struct kimage *image);
|
|
#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
|
|
#endif
|
|
#endif
|
|
|
|
typedef void crash_vmclear_fn(void);
|
|
extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
|
|
extern void kdump_nmi_shootdown_cpus(void);
|
|
|
|
#ifdef CONFIG_CRASH_HOTPLUG
|
|
void arch_crash_handle_hotplug_event(struct kimage *image);
|
|
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
int arch_crash_hotplug_cpu_support(void);
|
|
#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
|
|
#endif
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
int arch_crash_hotplug_memory_support(void);
|
|
#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
|
|
#endif
|
|
|
|
unsigned int arch_crash_get_elfcorehdr_size(void);
|
|
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
|
|
#endif
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_KEXEC_H */
|