mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
Merge branch 'x86/alternatives' into x86/core, to merge dependent commits
Prepare to resolve conflicts with an upstream series of fixes that conflict
with pending x86 changes:
6f5bf947ba
Merge tag 'its-for-linus-20250509' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
11d8f542d9
|
@ -477,7 +477,7 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
|
|||
return text_poke(addr, opcode, len);
|
||||
}
|
||||
|
||||
void text_poke_sync(void)
|
||||
void smp_text_poke_sync_each_cpu(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
|
|
@ -153,6 +153,7 @@ config X86
|
|||
select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
|
||||
select ARCH_WANTS_THP_SWAP if X86_64
|
||||
select ARCH_HAS_PARANOID_L1D_FLUSH
|
||||
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
|
||||
select BUILDTIME_TABLE_SORT
|
||||
select CLKEVT_I8253
|
||||
select CLOCKSOURCE_WATCHDOG
|
||||
|
|
|
@ -2803,8 +2803,15 @@ static unsigned long get_segment_base(unsigned int segment)
|
|||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/*
|
||||
* If we're not in a valid context with a real (not just lazy)
|
||||
* user mm, then don't even try.
|
||||
*/
|
||||
if (!nmi_uaccess_okay())
|
||||
return 0;
|
||||
|
||||
/* IRQs are off, so this synchronizes with smp_store_release */
|
||||
ldt = READ_ONCE(current->active_mm->context.ldt);
|
||||
ldt = smp_load_acquire(¤t->mm->context.ldt);
|
||||
if (!ldt || idx >= ldt->nr_entries)
|
||||
return 0;
|
||||
|
||||
|
|
|
@ -82,6 +82,12 @@ struct alt_instr {
|
|||
|
||||
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
||||
|
||||
extern s32 __retpoline_sites[], __retpoline_sites_end[];
|
||||
extern s32 __return_sites[], __return_sites_end[];
|
||||
extern s32 __cfi_sites[], __cfi_sites_end[];
|
||||
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
|
||||
extern s32 __smp_locks[], __smp_locks_end[];
|
||||
|
||||
/*
|
||||
* Debug flag that can be tested to see whether alternative
|
||||
* instructions were patched in already:
|
||||
|
@ -335,11 +341,6 @@ void nop_func(void);
|
|||
__ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
|
||||
.endm
|
||||
|
||||
#define old_len 141b-140b
|
||||
#define new_len1 144f-143f
|
||||
#define new_len2 145f-144f
|
||||
#define new_len3 146f-145f
|
||||
|
||||
/*
|
||||
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
|
||||
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#define MM_CONTEXT_LOCK_LAM 2
|
||||
/* Allow LAM and SVA coexisting */
|
||||
#define MM_CONTEXT_FORCE_TAGGED_SVA 3
|
||||
/* Tracks mm_cpumask */
|
||||
#define MM_CONTEXT_NOTRACK 4
|
||||
|
||||
/*
|
||||
* x86 has arch-specific MMU state beyond what lives in mm_struct.
|
||||
|
@ -44,9 +46,7 @@ typedef struct {
|
|||
struct ldt_struct *ldt;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long flags;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ADDRESS_MASKING
|
||||
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
|
||||
|
|
|
@ -190,7 +190,7 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
|||
#define activate_mm(prev, next) \
|
||||
do { \
|
||||
paravirt_enter_mmap(next); \
|
||||
switch_mm((prev), (next), NULL); \
|
||||
switch_mm_irqs_off((prev), (next), NULL); \
|
||||
} while (0);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -247,6 +247,16 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
|
|||
}
|
||||
#endif
|
||||
|
||||
static inline bool is_notrack_mm(struct mm_struct *mm)
|
||||
{
|
||||
return test_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
|
||||
}
|
||||
|
||||
static inline void set_notrack_mm(struct mm_struct *mm)
|
||||
{
|
||||
set_bit(MM_CONTEXT_NOTRACK, &mm->context.flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* We only want to enforce protection keys on the current process
|
||||
* because we effectively have no access to PKRU for other
|
||||
|
@ -272,4 +282,7 @@ unsigned long __get_current_cr3_fast(void);
|
|||
|
||||
#include <asm-generic/mmu_context.h>
|
||||
|
||||
extern struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm);
|
||||
extern void unuse_temporary_mm(struct mm_struct *prev_mm);
|
||||
|
||||
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
||||
|
|
|
@ -11,11 +11,11 @@
|
|||
* JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5.
|
||||
* Raise it if needed.
|
||||
*/
|
||||
#define POKE_MAX_OPCODE_SIZE 5
|
||||
#define TEXT_POKE_MAX_OPCODE_SIZE 5
|
||||
|
||||
extern void text_poke_early(void *addr, const void *opcode, size_t len);
|
||||
|
||||
extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
|
||||
extern void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
|
||||
|
||||
/*
|
||||
* Clear and restore the kernel write-protection flag on the local CPU.
|
||||
|
@ -32,17 +32,17 @@ extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u
|
|||
* an inconsistent instruction while you patch.
|
||||
*/
|
||||
extern void *text_poke(void *addr, const void *opcode, size_t len);
|
||||
extern void text_poke_sync(void);
|
||||
extern void smp_text_poke_sync_each_cpu(void);
|
||||
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
|
||||
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
|
||||
#define text_poke_copy text_poke_copy
|
||||
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
|
||||
extern void *text_poke_set(void *addr, int c, size_t len);
|
||||
extern int poke_int3_handler(struct pt_regs *regs);
|
||||
extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
|
||||
extern int smp_text_poke_int3_handler(struct pt_regs *regs);
|
||||
extern void smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate);
|
||||
|
||||
extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate);
|
||||
extern void text_poke_finish(void);
|
||||
extern void smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate);
|
||||
extern void smp_text_poke_batch_finish(void);
|
||||
|
||||
#define INT3_INSN_SIZE 1
|
||||
#define INT3_INSN_OPCODE 0xCC
|
||||
|
@ -82,7 +82,7 @@ static __always_inline int text_opcode_size(u8 opcode)
|
|||
}
|
||||
|
||||
union text_poke_insn {
|
||||
u8 text[POKE_MAX_OPCODE_SIZE];
|
||||
u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
|
||||
struct {
|
||||
u8 opcode;
|
||||
s32 disp;
|
||||
|
@ -128,8 +128,8 @@ void *text_gen_insn(u8 opcode, const void *addr, const void *dest)
|
|||
}
|
||||
|
||||
extern int after_bootmem;
|
||||
extern __ro_after_init struct mm_struct *poking_mm;
|
||||
extern __ro_after_init unsigned long poking_addr;
|
||||
extern __ro_after_init struct mm_struct *text_poke_mm;
|
||||
extern __ro_after_init unsigned long text_poke_mm_addr;
|
||||
|
||||
#ifndef CONFIG_UML_X86
|
||||
static __always_inline
|
||||
|
@ -142,13 +142,14 @@ static __always_inline
|
|||
void int3_emulate_push(struct pt_regs *regs, unsigned long val)
|
||||
{
|
||||
/*
|
||||
* The int3 handler in entry_64.S adds a gap between the
|
||||
* The INT3 handler in entry_64.S adds a gap between the
|
||||
* stack where the break point happened, and the saving of
|
||||
* pt_regs. We can extend the original stack because of
|
||||
* this gap. See the idtentry macro's create_gap option.
|
||||
* this gap. See the idtentry macro's X86_TRAP_BP logic.
|
||||
*
|
||||
* Similarly entry_32.S will have a gap on the stack for (any) hardware
|
||||
* exception and pt_regs; see FIXUP_FRAME.
|
||||
* Similarly, entry_32.S will have a gap on the stack for
|
||||
* (any) hardware exception and pt_regs; see the
|
||||
* FIXUP_FRAME macro.
|
||||
*/
|
||||
regs->sp -= sizeof(unsigned long);
|
||||
*(unsigned long *)regs->sp = val;
|
||||
|
|
|
@ -1,36 +1,14 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#define pr_fmt(fmt) "SMP alternatives: " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/stringify.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/memory.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sync_core.h>
|
||||
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/insn.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/asm-prototypes.h>
|
||||
#include <asm/cfi.h>
|
||||
#include <asm/nmi.h>
|
||||
|
||||
int __read_mostly alternatives_patched;
|
||||
|
||||
|
@ -171,13 +149,6 @@ static void add_nop(u8 *buf, unsigned int len)
|
|||
*buf = INT3_INSN_OPCODE;
|
||||
}
|
||||
|
||||
extern s32 __retpoline_sites[], __retpoline_sites_end[];
|
||||
extern s32 __return_sites[], __return_sites_end[];
|
||||
extern s32 __cfi_sites[], __cfi_sites_end[];
|
||||
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
|
||||
extern s32 __smp_locks[], __smp_locks_end[];
|
||||
void text_poke_early(void *addr, const void *opcode, size_t len);
|
||||
|
||||
/*
|
||||
* Matches NOP and NOPL, not any of the other possible NOPs.
|
||||
*/
|
||||
|
@ -369,7 +340,7 @@ static void __apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen,
|
|||
}
|
||||
}
|
||||
|
||||
void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
|
||||
void text_poke_apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
|
||||
{
|
||||
__apply_relocation(buf, instr, instrlen, repl, repl_len);
|
||||
optimize_nops(instr, buf, instrlen);
|
||||
|
@ -525,7 +496,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
|||
for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
|
||||
insn_buff[insn_buff_sz] = 0x90;
|
||||
|
||||
apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
|
||||
text_poke_apply_relocation(insn_buff, instr, a->instrlen, replacement, a->replacementlen);
|
||||
|
||||
DUMP_BYTES(ALT, instr, a->instrlen, "%px: old_insn: ", instr);
|
||||
DUMP_BYTES(ALT, replacement, a->replacementlen, "%px: rpl_insn: ", replacement);
|
||||
|
@ -2010,7 +1981,7 @@ __visible noinline void __init __alt_reloc_selftest(void *arg)
|
|||
static noinline void __init alt_reloc_selftest(void)
|
||||
{
|
||||
/*
|
||||
* Tests apply_relocation().
|
||||
* Tests text_poke_apply_relocation().
|
||||
*
|
||||
* This has a relative immediate (CALL) in a place other than the first
|
||||
* instruction and additionally on x86_64 we get a RIP-relative LEA:
|
||||
|
@ -2140,76 +2111,8 @@ void __init_or_module text_poke_early(void *addr, const void *opcode,
|
|||
}
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct mm_struct *mm;
|
||||
} temp_mm_state_t;
|
||||
|
||||
/*
|
||||
* Using a temporary mm allows to set temporary mappings that are not accessible
|
||||
* by other CPUs. Such mappings are needed to perform sensitive memory writes
|
||||
* that override the kernel memory protections (e.g., W^X), without exposing the
|
||||
* temporary page-table mappings that are required for these write operations to
|
||||
* other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
|
||||
* mapping is torn down.
|
||||
*
|
||||
* Context: The temporary mm needs to be used exclusively by a single core. To
|
||||
* harden security IRQs must be disabled while the temporary mm is
|
||||
* loaded, thereby preventing interrupt handler bugs from overriding
|
||||
* the kernel memory protection.
|
||||
*/
|
||||
static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
|
||||
{
|
||||
temp_mm_state_t temp_state;
|
||||
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/*
|
||||
* Make sure not to be in TLB lazy mode, as otherwise we'll end up
|
||||
* with a stale address space WITHOUT being in lazy mode after
|
||||
* restoring the previous mm.
|
||||
*/
|
||||
if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
|
||||
leave_mm();
|
||||
|
||||
temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
switch_mm_irqs_off(NULL, mm, current);
|
||||
|
||||
/*
|
||||
* If breakpoints are enabled, disable them while the temporary mm is
|
||||
* used. Userspace might set up watchpoints on addresses that are used
|
||||
* in the temporary mm, which would lead to wrong signals being sent or
|
||||
* crashes.
|
||||
*
|
||||
* Note that breakpoints are not disabled selectively, which also causes
|
||||
* kernel breakpoints (e.g., perf's) to be disabled. This might be
|
||||
* undesirable, but still seems reasonable as the code that runs in the
|
||||
* temporary mm should be short.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_disable();
|
||||
|
||||
return temp_state;
|
||||
}
|
||||
|
||||
__ro_after_init struct mm_struct *poking_mm;
|
||||
__ro_after_init unsigned long poking_addr;
|
||||
|
||||
static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
|
||||
{
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
switch_mm_irqs_off(NULL, prev_state.mm, current);
|
||||
|
||||
/* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
|
||||
cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(poking_mm));
|
||||
|
||||
/*
|
||||
* Restore the breakpoints if they were disabled before the temporary mm
|
||||
* was loaded.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
}
|
||||
__ro_after_init struct mm_struct *text_poke_mm;
|
||||
__ro_after_init unsigned long text_poke_mm_addr;
|
||||
|
||||
static void text_poke_memcpy(void *dst, const void *src, size_t len)
|
||||
{
|
||||
|
@ -2229,7 +2132,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
|
|||
{
|
||||
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
|
||||
struct page *pages[2] = {NULL};
|
||||
temp_mm_state_t prev;
|
||||
struct mm_struct *prev_mm;
|
||||
unsigned long flags;
|
||||
pte_t pte, *ptep;
|
||||
spinlock_t *ptl;
|
||||
|
@ -2266,7 +2169,7 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
|
|||
/*
|
||||
* The lock is not really needed, but this allows to avoid open-coding.
|
||||
*/
|
||||
ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
|
||||
ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl);
|
||||
|
||||
/*
|
||||
* This must not fail; preallocated in poking_init().
|
||||
|
@ -2276,21 +2179,21 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
|
|||
local_irq_save(flags);
|
||||
|
||||
pte = mk_pte(pages[0], pgprot);
|
||||
set_pte_at(poking_mm, poking_addr, ptep, pte);
|
||||
set_pte_at(text_poke_mm, text_poke_mm_addr, ptep, pte);
|
||||
|
||||
if (cross_page_boundary) {
|
||||
pte = mk_pte(pages[1], pgprot);
|
||||
set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
|
||||
set_pte_at(text_poke_mm, text_poke_mm_addr + PAGE_SIZE, ptep + 1, pte);
|
||||
}
|
||||
|
||||
/*
|
||||
* Loading the temporary mm behaves as a compiler barrier, which
|
||||
* guarantees that the PTE will be set at the time memcpy() is done.
|
||||
*/
|
||||
prev = use_temporary_mm(poking_mm);
|
||||
prev_mm = use_temporary_mm(text_poke_mm);
|
||||
|
||||
kasan_disable_current();
|
||||
func((u8 *)poking_addr + offset_in_page(addr), src, len);
|
||||
func((u8 *)text_poke_mm_addr + offset_in_page(addr), src, len);
|
||||
kasan_enable_current();
|
||||
|
||||
/*
|
||||
|
@ -2299,22 +2202,22 @@ static void *__text_poke(text_poke_f func, void *addr, const void *src, size_t l
|
|||
*/
|
||||
barrier();
|
||||
|
||||
pte_clear(poking_mm, poking_addr, ptep);
|
||||
pte_clear(text_poke_mm, text_poke_mm_addr, ptep);
|
||||
if (cross_page_boundary)
|
||||
pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
|
||||
pte_clear(text_poke_mm, text_poke_mm_addr + PAGE_SIZE, ptep + 1);
|
||||
|
||||
/*
|
||||
* Loading the previous page-table hierarchy requires a serializing
|
||||
* instruction that already allows the core to see the updated version.
|
||||
* Xen-PV is assumed to serialize execution in a similar manner.
|
||||
*/
|
||||
unuse_temporary_mm(prev);
|
||||
unuse_temporary_mm(prev_mm);
|
||||
|
||||
/*
|
||||
* Flushing the TLB might involve IPIs, which would require enabled
|
||||
* IRQs, but not if the mm is not used, as it is in this point.
|
||||
*/
|
||||
flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
|
||||
flush_tlb_mm_range(text_poke_mm, text_poke_mm_addr, text_poke_mm_addr +
|
||||
(cross_page_boundary ? 2 : 1) * PAGE_SIZE,
|
||||
PAGE_SHIFT, false);
|
||||
|
||||
|
@ -2450,7 +2353,7 @@ static void do_sync_core(void *info)
|
|||
sync_core();
|
||||
}
|
||||
|
||||
void text_poke_sync(void)
|
||||
void smp_text_poke_sync_each_cpu(void)
|
||||
{
|
||||
on_each_cpu(do_sync_core, NULL, 1);
|
||||
}
|
||||
|
@ -2460,64 +2363,66 @@ void text_poke_sync(void)
|
|||
* this thing. When len == 6 everything is prefixed with 0x0f and we map
|
||||
* opcode to Jcc.d8, using len to distinguish.
|
||||
*/
|
||||
struct text_poke_loc {
|
||||
struct smp_text_poke_loc {
|
||||
/* addr := _stext + rel_addr */
|
||||
s32 rel_addr;
|
||||
s32 disp;
|
||||
u8 len;
|
||||
u8 opcode;
|
||||
const u8 text[POKE_MAX_OPCODE_SIZE];
|
||||
/* see text_poke_bp_batch() */
|
||||
const u8 text[TEXT_POKE_MAX_OPCODE_SIZE];
|
||||
/* see smp_text_poke_batch_finish() */
|
||||
u8 old;
|
||||
};
|
||||
|
||||
struct bp_patching_desc {
|
||||
struct text_poke_loc *vec;
|
||||
#define TEXT_POKE_ARRAY_MAX (PAGE_SIZE / sizeof(struct smp_text_poke_loc))
|
||||
|
||||
static struct smp_text_poke_array {
|
||||
struct smp_text_poke_loc vec[TEXT_POKE_ARRAY_MAX];
|
||||
int nr_entries;
|
||||
atomic_t refs;
|
||||
};
|
||||
} text_poke_array;
|
||||
|
||||
static struct bp_patching_desc bp_desc;
|
||||
static DEFINE_PER_CPU(atomic_t, text_poke_array_refs);
|
||||
|
||||
static __always_inline
|
||||
struct bp_patching_desc *try_get_desc(void)
|
||||
/*
|
||||
* These four __always_inline annotations imply noinstr, necessary
|
||||
* due to smp_text_poke_int3_handler() being noinstr:
|
||||
*/
|
||||
|
||||
static __always_inline bool try_get_text_poke_array(void)
|
||||
{
|
||||
struct bp_patching_desc *desc = &bp_desc;
|
||||
atomic_t *refs = this_cpu_ptr(&text_poke_array_refs);
|
||||
|
||||
if (!raw_atomic_inc_not_zero(&desc->refs))
|
||||
return NULL;
|
||||
if (!raw_atomic_inc_not_zero(refs))
|
||||
return false;
|
||||
|
||||
return desc;
|
||||
return true;
|
||||
}
|
||||
|
||||
static __always_inline void put_desc(void)
|
||||
static __always_inline void put_text_poke_array(void)
|
||||
{
|
||||
struct bp_patching_desc *desc = &bp_desc;
|
||||
atomic_t *refs = this_cpu_ptr(&text_poke_array_refs);
|
||||
|
||||
smp_mb__before_atomic();
|
||||
raw_atomic_dec(&desc->refs);
|
||||
raw_atomic_dec(refs);
|
||||
}
|
||||
|
||||
static __always_inline void *text_poke_addr(struct text_poke_loc *tp)
|
||||
static __always_inline void *text_poke_addr(const struct smp_text_poke_loc *tpl)
|
||||
{
|
||||
return _stext + tp->rel_addr;
|
||||
return _stext + tpl->rel_addr;
|
||||
}
|
||||
|
||||
static __always_inline int patch_cmp(const void *key, const void *elt)
|
||||
static __always_inline int patch_cmp(const void *tpl_a, const void *tpl_b)
|
||||
{
|
||||
struct text_poke_loc *tp = (struct text_poke_loc *) elt;
|
||||
|
||||
if (key < text_poke_addr(tp))
|
||||
if (tpl_a < text_poke_addr(tpl_b))
|
||||
return -1;
|
||||
if (key > text_poke_addr(tp))
|
||||
if (tpl_a > text_poke_addr(tpl_b))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
noinstr int poke_int3_handler(struct pt_regs *regs)
|
||||
noinstr int smp_text_poke_int3_handler(struct pt_regs *regs)
|
||||
{
|
||||
struct bp_patching_desc *desc;
|
||||
struct text_poke_loc *tp;
|
||||
struct smp_text_poke_loc *tpl;
|
||||
int ret = 0;
|
||||
void *ip;
|
||||
|
||||
|
@ -2526,41 +2431,40 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
|
|||
|
||||
/*
|
||||
* Having observed our INT3 instruction, we now must observe
|
||||
* bp_desc with non-zero refcount:
|
||||
* text_poke_array with non-zero refcount:
|
||||
*
|
||||
* bp_desc.refs = 1 INT3
|
||||
* WMB RMB
|
||||
* write INT3 if (bp_desc.refs != 0)
|
||||
* text_poke_array_refs = 1 INT3
|
||||
* WMB RMB
|
||||
* write INT3 if (text_poke_array_refs != 0)
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
desc = try_get_desc();
|
||||
if (!desc)
|
||||
if (!try_get_text_poke_array())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Discount the INT3. See text_poke_bp_batch().
|
||||
* Discount the INT3. See smp_text_poke_batch_finish().
|
||||
*/
|
||||
ip = (void *) regs->ip - INT3_INSN_SIZE;
|
||||
|
||||
/*
|
||||
* Skip the binary search if there is a single member in the vector.
|
||||
*/
|
||||
if (unlikely(desc->nr_entries > 1)) {
|
||||
tp = __inline_bsearch(ip, desc->vec, desc->nr_entries,
|
||||
sizeof(struct text_poke_loc),
|
||||
if (unlikely(text_poke_array.nr_entries > 1)) {
|
||||
tpl = __inline_bsearch(ip, text_poke_array.vec, text_poke_array.nr_entries,
|
||||
sizeof(struct smp_text_poke_loc),
|
||||
patch_cmp);
|
||||
if (!tp)
|
||||
if (!tpl)
|
||||
goto out_put;
|
||||
} else {
|
||||
tp = desc->vec;
|
||||
if (text_poke_addr(tp) != ip)
|
||||
tpl = text_poke_array.vec;
|
||||
if (text_poke_addr(tpl) != ip)
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
ip += tp->len;
|
||||
ip += tpl->len;
|
||||
|
||||
switch (tp->opcode) {
|
||||
switch (tpl->opcode) {
|
||||
case INT3_INSN_OPCODE:
|
||||
/*
|
||||
* Someone poked an explicit INT3, they'll want to handle it,
|
||||
|
@ -2573,16 +2477,16 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
|
|||
break;
|
||||
|
||||
case CALL_INSN_OPCODE:
|
||||
int3_emulate_call(regs, (long)ip + tp->disp);
|
||||
int3_emulate_call(regs, (long)ip + tpl->disp);
|
||||
break;
|
||||
|
||||
case JMP32_INSN_OPCODE:
|
||||
case JMP8_INSN_OPCODE:
|
||||
int3_emulate_jmp(regs, (long)ip + tp->disp);
|
||||
int3_emulate_jmp(regs, (long)ip + tpl->disp);
|
||||
break;
|
||||
|
||||
case 0x70 ... 0x7f: /* Jcc */
|
||||
int3_emulate_jcc(regs, tp->opcode & 0xf, (long)ip, tp->disp);
|
||||
int3_emulate_jcc(regs, tpl->opcode & 0xf, (long)ip, tpl->disp);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -2592,51 +2496,50 @@ noinstr int poke_int3_handler(struct pt_regs *regs)
|
|||
ret = 1;
|
||||
|
||||
out_put:
|
||||
put_desc();
|
||||
put_text_poke_array();
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define TP_VEC_MAX (PAGE_SIZE / sizeof(struct text_poke_loc))
|
||||
static struct text_poke_loc tp_vec[TP_VEC_MAX];
|
||||
static int tp_vec_nr;
|
||||
|
||||
/**
|
||||
* text_poke_bp_batch() -- update instructions on live kernel on SMP
|
||||
* @tp: vector of instructions to patch
|
||||
* @nr_entries: number of entries in the vector
|
||||
* smp_text_poke_batch_finish() -- update instructions on live kernel on SMP
|
||||
*
|
||||
* Modify multi-byte instruction by using int3 breakpoint on SMP.
|
||||
* We completely avoid stop_machine() here, and achieve the
|
||||
* synchronization using int3 breakpoint.
|
||||
* Input state:
|
||||
* text_poke_array.vec: vector of instructions to patch
|
||||
* text_poke_array.nr_entries: number of entries in the vector
|
||||
*
|
||||
* Modify multi-byte instructions by using INT3 breakpoints on SMP.
|
||||
* We completely avoid using stop_machine() here, and achieve the
|
||||
* synchronization using INT3 breakpoints and SMP cross-calls.
|
||||
*
|
||||
* The way it is done:
|
||||
* - For each entry in the vector:
|
||||
* - add a int3 trap to the address that will be patched
|
||||
* - sync cores
|
||||
* - add an INT3 trap to the address that will be patched
|
||||
* - SMP sync all CPUs
|
||||
* - For each entry in the vector:
|
||||
* - update all but the first byte of the patched range
|
||||
* - sync cores
|
||||
* - SMP sync all CPUs
|
||||
* - For each entry in the vector:
|
||||
* - replace the first byte (int3) by the first byte of
|
||||
* - replace the first byte (INT3) by the first byte of the
|
||||
* replacing opcode
|
||||
* - sync cores
|
||||
* - SMP sync all CPUs
|
||||
*/
|
||||
static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
|
||||
void smp_text_poke_batch_finish(void)
|
||||
{
|
||||
unsigned char int3 = INT3_INSN_OPCODE;
|
||||
unsigned int i;
|
||||
int do_sync;
|
||||
|
||||
if (!text_poke_array.nr_entries)
|
||||
return;
|
||||
|
||||
lockdep_assert_held(&text_mutex);
|
||||
|
||||
bp_desc.vec = tp;
|
||||
bp_desc.nr_entries = nr_entries;
|
||||
|
||||
/*
|
||||
* Corresponds to the implicit memory barrier in try_get_desc() to
|
||||
* ensure reading a non-zero refcount provides up to date bp_desc data.
|
||||
* Corresponds to the implicit memory barrier in try_get_text_poke_array() to
|
||||
* ensure reading a non-zero refcount provides up to date text_poke_array data.
|
||||
*/
|
||||
atomic_set_release(&bp_desc.refs, 1);
|
||||
for_each_possible_cpu(i)
|
||||
atomic_set_release(per_cpu_ptr(&text_poke_array_refs, i), 1);
|
||||
|
||||
/*
|
||||
* Function tracing can enable thousands of places that need to be
|
||||
|
@ -2649,33 +2552,33 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
cond_resched();
|
||||
|
||||
/*
|
||||
* Corresponding read barrier in int3 notifier for making sure the
|
||||
* nr_entries and handler are correctly ordered wrt. patching.
|
||||
* Corresponding read barrier in INT3 notifier for making sure the
|
||||
* text_poke_array.nr_entries and handler are correctly ordered wrt. patching.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
/*
|
||||
* First step: add a int3 trap to the address that will be patched.
|
||||
* First step: add a INT3 trap to the address that will be patched.
|
||||
*/
|
||||
for (i = 0; i < nr_entries; i++) {
|
||||
tp[i].old = *(u8 *)text_poke_addr(&tp[i]);
|
||||
text_poke(text_poke_addr(&tp[i]), &int3, INT3_INSN_SIZE);
|
||||
for (i = 0; i < text_poke_array.nr_entries; i++) {
|
||||
text_poke_array.vec[i].old = *(u8 *)text_poke_addr(&text_poke_array.vec[i]);
|
||||
text_poke(text_poke_addr(&text_poke_array.vec[i]), &int3, INT3_INSN_SIZE);
|
||||
}
|
||||
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
|
||||
/*
|
||||
* Second step: update all but the first byte of the patched range.
|
||||
*/
|
||||
for (do_sync = 0, i = 0; i < nr_entries; i++) {
|
||||
u8 old[POKE_MAX_OPCODE_SIZE+1] = { tp[i].old, };
|
||||
u8 _new[POKE_MAX_OPCODE_SIZE+1];
|
||||
const u8 *new = tp[i].text;
|
||||
int len = tp[i].len;
|
||||
for (do_sync = 0, i = 0; i < text_poke_array.nr_entries; i++) {
|
||||
u8 old[TEXT_POKE_MAX_OPCODE_SIZE+1] = { text_poke_array.vec[i].old, };
|
||||
u8 _new[TEXT_POKE_MAX_OPCODE_SIZE+1];
|
||||
const u8 *new = text_poke_array.vec[i].text;
|
||||
int len = text_poke_array.vec[i].len;
|
||||
|
||||
if (len - INT3_INSN_SIZE > 0) {
|
||||
memcpy(old + INT3_INSN_SIZE,
|
||||
text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
text_poke_addr(&text_poke_array.vec[i]) + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
|
||||
if (len == 6) {
|
||||
|
@ -2684,7 +2587,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
new = _new;
|
||||
}
|
||||
|
||||
text_poke(text_poke_addr(&tp[i]) + INT3_INSN_SIZE,
|
||||
text_poke(text_poke_addr(&text_poke_array.vec[i]) + INT3_INSN_SIZE,
|
||||
new + INT3_INSN_SIZE,
|
||||
len - INT3_INSN_SIZE);
|
||||
|
||||
|
@ -2715,7 +2618,7 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
* The old instruction is recorded so that the event can be
|
||||
* processed forwards or backwards.
|
||||
*/
|
||||
perf_event_text_poke(text_poke_addr(&tp[i]), old, len, new, len);
|
||||
perf_event_text_poke(text_poke_addr(&text_poke_array.vec[i]), old, len, new, len);
|
||||
}
|
||||
|
||||
if (do_sync) {
|
||||
|
@ -2724,63 +2627,79 @@ static void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries
|
|||
* not necessary and we'd be safe even without it. But
|
||||
* better safe than sorry (plus there's not only Intel).
|
||||
*/
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
}
|
||||
|
||||
/*
|
||||
* Third step: replace the first byte (int3) by the first byte of
|
||||
* Third step: replace the first byte (INT3) by the first byte of the
|
||||
* replacing opcode.
|
||||
*/
|
||||
for (do_sync = 0, i = 0; i < nr_entries; i++) {
|
||||
u8 byte = tp[i].text[0];
|
||||
for (do_sync = 0, i = 0; i < text_poke_array.nr_entries; i++) {
|
||||
u8 byte = text_poke_array.vec[i].text[0];
|
||||
|
||||
if (tp[i].len == 6)
|
||||
if (text_poke_array.vec[i].len == 6)
|
||||
byte = 0x0f;
|
||||
|
||||
if (byte == INT3_INSN_OPCODE)
|
||||
continue;
|
||||
|
||||
text_poke(text_poke_addr(&tp[i]), &byte, INT3_INSN_SIZE);
|
||||
text_poke(text_poke_addr(&text_poke_array.vec[i]), &byte, INT3_INSN_SIZE);
|
||||
do_sync++;
|
||||
}
|
||||
|
||||
if (do_sync)
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
|
||||
/*
|
||||
* Remove and wait for refs to be zero.
|
||||
*
|
||||
* Notably, if after step-3 above the INT3 got removed, then the
|
||||
* smp_text_poke_sync_each_cpu() will have serialized against any running INT3
|
||||
* handlers and the below spin-wait will not happen.
|
||||
*
|
||||
* IOW. unless the replacement instruction is INT3, this case goes
|
||||
* unused.
|
||||
*/
|
||||
if (!atomic_dec_and_test(&bp_desc.refs))
|
||||
atomic_cond_read_acquire(&bp_desc.refs, !VAL);
|
||||
for_each_possible_cpu(i) {
|
||||
atomic_t *refs = per_cpu_ptr(&text_poke_array_refs, i);
|
||||
|
||||
if (unlikely(!atomic_dec_and_test(refs)))
|
||||
atomic_cond_read_acquire(refs, !VAL);
|
||||
}
|
||||
|
||||
/* They are all completed: */
|
||||
text_poke_array.nr_entries = 0;
|
||||
}
|
||||
|
||||
static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
|
||||
const void *opcode, size_t len, const void *emulate)
|
||||
static void __smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
|
||||
{
|
||||
struct smp_text_poke_loc *tpl;
|
||||
struct insn insn;
|
||||
int ret, i = 0;
|
||||
|
||||
tpl = &text_poke_array.vec[text_poke_array.nr_entries++];
|
||||
|
||||
if (len == 6)
|
||||
i = 1;
|
||||
memcpy((void *)tp->text, opcode+i, len-i);
|
||||
memcpy((void *)tpl->text, opcode+i, len-i);
|
||||
if (!emulate)
|
||||
emulate = opcode;
|
||||
|
||||
ret = insn_decode_kernel(&insn, emulate);
|
||||
BUG_ON(ret < 0);
|
||||
|
||||
tp->rel_addr = addr - (void *)_stext;
|
||||
tp->len = len;
|
||||
tp->opcode = insn.opcode.bytes[0];
|
||||
tpl->rel_addr = addr - (void *)_stext;
|
||||
tpl->len = len;
|
||||
tpl->opcode = insn.opcode.bytes[0];
|
||||
|
||||
if (is_jcc32(&insn)) {
|
||||
/*
|
||||
* Map Jcc.d32 onto Jcc.d8 and use len to distinguish.
|
||||
*/
|
||||
tp->opcode = insn.opcode.bytes[1] - 0x10;
|
||||
tpl->opcode = insn.opcode.bytes[1] - 0x10;
|
||||
}
|
||||
|
||||
switch (tp->opcode) {
|
||||
switch (tpl->opcode) {
|
||||
case RET_INSN_OPCODE:
|
||||
case JMP32_INSN_OPCODE:
|
||||
case JMP8_INSN_OPCODE:
|
||||
|
@ -2789,14 +2708,14 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
|
|||
* next instruction can be padded with INT3.
|
||||
*/
|
||||
for (i = insn.length; i < len; i++)
|
||||
BUG_ON(tp->text[i] != INT3_INSN_OPCODE);
|
||||
BUG_ON(tpl->text[i] != INT3_INSN_OPCODE);
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG_ON(len != insn.length);
|
||||
}
|
||||
|
||||
switch (tp->opcode) {
|
||||
switch (tpl->opcode) {
|
||||
case INT3_INSN_OPCODE:
|
||||
case RET_INSN_OPCODE:
|
||||
break;
|
||||
|
@ -2805,21 +2724,21 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
|
|||
case JMP32_INSN_OPCODE:
|
||||
case JMP8_INSN_OPCODE:
|
||||
case 0x70 ... 0x7f: /* Jcc */
|
||||
tp->disp = insn.immediate.value;
|
||||
tpl->disp = insn.immediate.value;
|
||||
break;
|
||||
|
||||
default: /* assume NOP */
|
||||
switch (len) {
|
||||
case 2: /* NOP2 -- emulate as JMP8+0 */
|
||||
BUG_ON(memcmp(emulate, x86_nops[len], len));
|
||||
tp->opcode = JMP8_INSN_OPCODE;
|
||||
tp->disp = 0;
|
||||
tpl->opcode = JMP8_INSN_OPCODE;
|
||||
tpl->disp = 0;
|
||||
break;
|
||||
|
||||
case 5: /* NOP5 -- emulate as JMP32+0 */
|
||||
BUG_ON(memcmp(emulate, x86_nops[len], len));
|
||||
tp->opcode = JMP32_INSN_OPCODE;
|
||||
tp->disp = 0;
|
||||
tpl->opcode = JMP32_INSN_OPCODE;
|
||||
tpl->disp = 0;
|
||||
break;
|
||||
|
||||
default: /* unknown instruction */
|
||||
|
@ -2830,51 +2749,50 @@ static void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
|
|||
}
|
||||
|
||||
/*
|
||||
* We hard rely on the tp_vec being ordered; ensure this is so by flushing
|
||||
* We hard rely on the text_poke_array.vec being ordered; ensure this is so by flushing
|
||||
* early if needed.
|
||||
*/
|
||||
static bool tp_order_fail(void *addr)
|
||||
static bool text_poke_addr_ordered(void *addr)
|
||||
{
|
||||
struct text_poke_loc *tp;
|
||||
WARN_ON_ONCE(!addr);
|
||||
|
||||
if (!tp_vec_nr)
|
||||
if (!text_poke_array.nr_entries)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the last current entry's address is higher than the
|
||||
* new entry's address we'd like to add, then ordering
|
||||
* is violated and we must first flush all pending patching
|
||||
* requests:
|
||||
*/
|
||||
if (text_poke_addr(text_poke_array.vec + text_poke_array.nr_entries-1) > addr)
|
||||
return false;
|
||||
|
||||
if (!addr) /* force */
|
||||
return true;
|
||||
|
||||
tp = &tp_vec[tp_vec_nr - 1];
|
||||
if ((unsigned long)text_poke_addr(tp) > (unsigned long)addr)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void text_poke_flush(void *addr)
|
||||
{
|
||||
if (tp_vec_nr == TP_VEC_MAX || tp_order_fail(addr)) {
|
||||
text_poke_bp_batch(tp_vec, tp_vec_nr);
|
||||
tp_vec_nr = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void text_poke_finish(void)
|
||||
{
|
||||
text_poke_flush(NULL);
|
||||
}
|
||||
|
||||
void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate)
|
||||
{
|
||||
struct text_poke_loc *tp;
|
||||
|
||||
text_poke_flush(addr);
|
||||
|
||||
tp = &tp_vec[tp_vec_nr++];
|
||||
text_poke_loc_init(tp, addr, opcode, len, emulate);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* text_poke_bp() -- update instructions on live kernel on SMP
|
||||
* smp_text_poke_batch_add() -- update instruction on live kernel on SMP, batched
|
||||
* @addr: address to patch
|
||||
* @opcode: opcode of new instruction
|
||||
* @len: length to copy
|
||||
* @emulate: instruction to be emulated
|
||||
*
|
||||
* Add a new instruction to the current queue of to-be-patched instructions
|
||||
* the kernel maintains. The patching request will not be executed immediately,
|
||||
* but becomes part of an array of patching requests, optimized for batched
|
||||
* execution. All pending patching requests will be executed on the next
|
||||
* smp_text_poke_batch_finish() call.
|
||||
*/
|
||||
void __ref smp_text_poke_batch_add(void *addr, const void *opcode, size_t len, const void *emulate)
|
||||
{
|
||||
if (text_poke_array.nr_entries == TEXT_POKE_ARRAY_MAX || !text_poke_addr_ordered(addr))
|
||||
smp_text_poke_batch_finish();
|
||||
__smp_text_poke_batch_add(addr, opcode, len, emulate);
|
||||
}
|
||||
|
||||
/**
|
||||
* smp_text_poke_single() -- update instruction on live kernel on SMP immediately
|
||||
* @addr: address to patch
|
||||
* @opcode: opcode of new instruction
|
||||
* @len: length to copy
|
||||
|
@ -2882,12 +2800,11 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi
|
|||
*
|
||||
* Update a single instruction with the vector in the stack, avoiding
|
||||
* dynamically allocated memory. This function should be used when it is
|
||||
* not possible to allocate memory.
|
||||
* not possible to allocate memory for a vector. The single instruction
|
||||
* is patched in immediately.
|
||||
*/
|
||||
void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
|
||||
void __ref smp_text_poke_single(void *addr, const void *opcode, size_t len, const void *emulate)
|
||||
{
|
||||
struct text_poke_loc tp;
|
||||
|
||||
text_poke_loc_init(&tp, addr, opcode, len, emulate);
|
||||
text_poke_bp_batch(&tp, 1);
|
||||
__smp_text_poke_batch_add(addr, opcode, len, emulate);
|
||||
smp_text_poke_batch_finish();
|
||||
}
|
||||
|
|
|
@ -185,7 +185,7 @@ static void *patch_dest(void *dest, bool direct)
|
|||
u8 *pad = dest - tsize;
|
||||
|
||||
memcpy(insn_buff, skl_call_thunk_template, tsize);
|
||||
apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
|
||||
text_poke_apply_relocation(insn_buff, pad, tsize, skl_call_thunk_template, tsize);
|
||||
|
||||
/* Already patched? */
|
||||
if (!bcmp(pad, insn_buff, tsize))
|
||||
|
@ -294,7 +294,7 @@ static bool is_callthunk(void *addr)
|
|||
pad = (void *)(dest - tmpl_size);
|
||||
|
||||
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
|
||||
apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
|
||||
text_poke_apply_relocation(insn_buff, pad, tmpl_size, skl_call_thunk_template, tmpl_size);
|
||||
|
||||
return !bcmp(pad, insn_buff, tmpl_size);
|
||||
}
|
||||
|
@ -312,7 +312,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
|
|||
return 0;
|
||||
|
||||
memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
|
||||
apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
|
||||
text_poke_apply_relocation(insn_buff, ip, tmpl_size, skl_call_thunk_template, tmpl_size);
|
||||
|
||||
memcpy(*pprog, insn_buff, tmpl_size);
|
||||
*pprog += tmpl_size;
|
||||
|
|
|
@ -55,10 +55,10 @@ void ftrace_arch_code_modify_post_process(void)
|
|||
{
|
||||
/*
|
||||
* ftrace_make_{call,nop}() may be called during
|
||||
* module load, and we need to finish the text_poke_queue()
|
||||
* module load, and we need to finish the smp_text_poke_batch_add()
|
||||
* that they do, here.
|
||||
*/
|
||||
text_poke_finish();
|
||||
smp_text_poke_batch_finish();
|
||||
ftrace_poke_late = 0;
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
@ -119,7 +119,7 @@ ftrace_modify_code_direct(unsigned long ip, const char *old_code,
|
|||
|
||||
/* replace the text with the new text */
|
||||
if (ftrace_poke_late)
|
||||
text_poke_queue((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_batch_add((void *)ip, new_code, MCOUNT_INSN_SIZE, NULL);
|
||||
else
|
||||
text_poke_early((void *)ip, new_code, MCOUNT_INSN_SIZE);
|
||||
return 0;
|
||||
|
@ -186,11 +186,11 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
|
|||
|
||||
ip = (unsigned long)(&ftrace_call);
|
||||
new = ftrace_call_replace(ip, (unsigned long)func);
|
||||
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
|
||||
ip = (unsigned long)(&ftrace_regs_call);
|
||||
new = ftrace_call_replace(ip, (unsigned long)func);
|
||||
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -247,10 +247,10 @@ void ftrace_replace_code(int enable)
|
|||
break;
|
||||
}
|
||||
|
||||
text_poke_queue((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_batch_add((void *)rec->ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
ftrace_update_record(rec, enable);
|
||||
}
|
||||
text_poke_finish();
|
||||
smp_text_poke_batch_finish();
|
||||
}
|
||||
|
||||
void arch_ftrace_update_code(int command)
|
||||
|
@ -492,7 +492,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
|
|||
mutex_lock(&text_mutex);
|
||||
/* Do a safe modify in case the trampoline is executing */
|
||||
new = ftrace_call_replace(ip, (unsigned long)func);
|
||||
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
|
@ -586,7 +586,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func)
|
|||
const char *new;
|
||||
|
||||
new = ftrace_jmp_replace(ip, (unsigned long)func);
|
||||
text_poke_bp((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
smp_text_poke_single((void *)ip, new, MCOUNT_INSN_SIZE, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -102,7 +102,7 @@ __jump_label_transform(struct jump_entry *entry,
|
|||
return;
|
||||
}
|
||||
|
||||
text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
|
||||
smp_text_poke_single((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
|
||||
}
|
||||
|
||||
static void __ref jump_label_transform(struct jump_entry *entry,
|
||||
|
@ -135,7 +135,7 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
|
|||
|
||||
mutex_lock(&text_mutex);
|
||||
jlp = __jump_label_patch(entry, type);
|
||||
text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
|
||||
smp_text_poke_batch_add((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL);
|
||||
mutex_unlock(&text_mutex);
|
||||
return true;
|
||||
}
|
||||
|
@ -143,6 +143,6 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
|
|||
void arch_jump_label_transform_apply(void)
|
||||
{
|
||||
mutex_lock(&text_mutex);
|
||||
text_poke_finish();
|
||||
smp_text_poke_batch_finish();
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
|
|
@ -808,7 +808,7 @@ void arch_arm_kprobe(struct kprobe *p)
|
|||
u8 int3 = INT3_INSN_OPCODE;
|
||||
|
||||
text_poke(p->addr, &int3, 1);
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
perf_event_text_poke(p->addr, &p->opcode, 1, &int3, 1);
|
||||
}
|
||||
|
||||
|
@ -818,7 +818,7 @@ void arch_disarm_kprobe(struct kprobe *p)
|
|||
|
||||
perf_event_text_poke(p->addr, &int3, 1, &p->opcode, 1);
|
||||
text_poke(p->addr, &p->opcode, 1);
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
}
|
||||
|
||||
void arch_remove_kprobe(struct kprobe *p)
|
||||
|
|
|
@ -488,7 +488,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
|
|||
insn_buff[0] = JMP32_INSN_OPCODE;
|
||||
*(s32 *)(&insn_buff[1]) = rel;
|
||||
|
||||
text_poke_bp(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
|
||||
smp_text_poke_single(op->kp.addr, insn_buff, JMP32_INSN_SIZE, NULL);
|
||||
|
||||
list_del_init(&op->list);
|
||||
}
|
||||
|
@ -513,11 +513,11 @@ void arch_unoptimize_kprobe(struct optimized_kprobe *op)
|
|||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
|
||||
text_poke(addr, new, INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
text_poke(addr + INT3_INSN_SIZE,
|
||||
new + INT3_INSN_SIZE,
|
||||
JMP32_INSN_SIZE - INT3_INSN_SIZE);
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
|
||||
perf_event_text_poke(op->kp.addr, old, JMP32_INSN_SIZE, new, JMP32_INSN_SIZE);
|
||||
}
|
||||
|
|
|
@ -206,7 +206,7 @@ static int write_relocate_add(Elf64_Shdr *sechdrs,
|
|||
write, apply);
|
||||
|
||||
if (!early) {
|
||||
text_poke_sync();
|
||||
smp_text_poke_sync_each_cpu();
|
||||
mutex_unlock(&text_mutex);
|
||||
}
|
||||
|
||||
|
|
|
@ -108,7 +108,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
|
|||
if (system_state == SYSTEM_BOOTING || modinit)
|
||||
return text_poke_early(insn, code, size);
|
||||
|
||||
text_poke_bp(insn, code, size, emulate);
|
||||
smp_text_poke_single(insn, code, size, emulate);
|
||||
}
|
||||
|
||||
static void __static_call_validate(u8 *insn, bool tail, bool tramp)
|
||||
|
|
|
@ -882,16 +882,16 @@ static void do_int3_user(struct pt_regs *regs)
|
|||
DEFINE_IDTENTRY_RAW(exc_int3)
|
||||
{
|
||||
/*
|
||||
* poke_int3_handler() is completely self contained code; it does (and
|
||||
* smp_text_poke_int3_handler() is completely self contained code; it does (and
|
||||
* must) *NOT* call out to anything, lest it hits upon yet another
|
||||
* INT3.
|
||||
*/
|
||||
if (poke_int3_handler(regs))
|
||||
if (smp_text_poke_int3_handler(regs))
|
||||
return;
|
||||
|
||||
/*
|
||||
* irqentry_enter_from_user_mode() uses static_branch_{,un}likely()
|
||||
* and therefore can trigger INT3, hence poke_int3_handler() must
|
||||
* and therefore can trigger INT3, hence smp_text_poke_int3_handler() must
|
||||
* be done before. If the entry came from kernel mode, then use
|
||||
* nmi_enter() because the INT3 could have been hit in any context
|
||||
* including NMI.
|
||||
|
|
|
@ -631,14 +631,21 @@ static bool get_desc(struct desc_struct *out, unsigned short sel)
|
|||
/* Bits [15:3] contain the index of the desired entry. */
|
||||
sel >>= 3;
|
||||
|
||||
mutex_lock(¤t->active_mm->context.lock);
|
||||
ldt = current->active_mm->context.ldt;
|
||||
/*
|
||||
* If we're not in a valid context with a real (not just lazy)
|
||||
* user mm, then don't even try.
|
||||
*/
|
||||
if (!nmi_uaccess_okay())
|
||||
return false;
|
||||
|
||||
mutex_lock(¤t->mm->context.lock);
|
||||
ldt = current->mm->context.ldt;
|
||||
if (ldt && sel < ldt->nr_entries) {
|
||||
*out = ldt->entries[sel];
|
||||
success = true;
|
||||
}
|
||||
|
||||
mutex_unlock(¤t->active_mm->context.lock);
|
||||
mutex_unlock(¤t->mm->context.lock);
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <asm/text-patching.h>
|
||||
#include <asm/memtype.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
||||
/*
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
|
@ -824,31 +825,33 @@ void __init poking_init(void)
|
|||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
|
||||
poking_mm = mm_alloc();
|
||||
BUG_ON(!poking_mm);
|
||||
text_poke_mm = mm_alloc();
|
||||
BUG_ON(!text_poke_mm);
|
||||
|
||||
/* Xen PV guests need the PGD to be pinned. */
|
||||
paravirt_enter_mmap(poking_mm);
|
||||
paravirt_enter_mmap(text_poke_mm);
|
||||
|
||||
set_notrack_mm(text_poke_mm);
|
||||
|
||||
/*
|
||||
* Randomize the poking address, but make sure that the following page
|
||||
* will be mapped at the same PMD. We need 2 pages, so find space for 3,
|
||||
* and adjust the address if the PMD ends after the first one.
|
||||
*/
|
||||
poking_addr = TASK_UNMAPPED_BASE;
|
||||
text_poke_mm_addr = TASK_UNMAPPED_BASE;
|
||||
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
|
||||
poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
|
||||
text_poke_mm_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
|
||||
(TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
|
||||
|
||||
if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
|
||||
poking_addr += PAGE_SIZE;
|
||||
if (((text_poke_mm_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
|
||||
text_poke_mm_addr += PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* We need to trigger the allocation of the page-tables that will be
|
||||
* needed for poking now. Later, poking may be performed in an atomic
|
||||
* section, which might cause allocation to fail.
|
||||
*/
|
||||
ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
|
||||
ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl);
|
||||
BUG_ON(!ptep);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
|
|
|
@ -847,7 +847,8 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
|
|||
* mm_cpumask. The TLB shootdown code can figure out from
|
||||
* cpu_tlbstate_shared.is_lazy whether or not to send an IPI.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) && WARN_ON_ONCE(prev != &init_mm &&
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) &&
|
||||
WARN_ON_ONCE(prev != &init_mm && !is_notrack_mm(prev) &&
|
||||
!cpumask_test_cpu(cpu, mm_cpumask(next))))
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
|
@ -906,14 +907,6 @@ void switch_mm_irqs_off(struct mm_struct *unused, struct mm_struct *next,
|
|||
this_cpu_write(cpu_tlbstate.loaded_mm, LOADED_MM_SWITCHING);
|
||||
barrier();
|
||||
|
||||
/*
|
||||
* Leave this CPU in prev's mm_cpumask. Atomic writes to
|
||||
* mm_cpumask can be expensive under contention. The CPU
|
||||
* will be removed lazily at TLB flush time.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(prev != &init_mm && !cpumask_test_cpu(cpu,
|
||||
mm_cpumask(prev)));
|
||||
|
||||
/* Start receiving IPIs and then read tlb_gen (and LAM below) */
|
||||
if (next != &init_mm && !cpumask_test_cpu(cpu, mm_cpumask(next)))
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
@ -972,6 +965,77 @@ void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|||
this_cpu_write(cpu_tlbstate_shared.is_lazy, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Using a temporary mm allows to set temporary mappings that are not accessible
|
||||
* by other CPUs. Such mappings are needed to perform sensitive memory writes
|
||||
* that override the kernel memory protections (e.g., W^X), without exposing the
|
||||
* temporary page-table mappings that are required for these write operations to
|
||||
* other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
|
||||
* mapping is torn down. Temporary mms can also be used for EFI runtime service
|
||||
* calls or similar functionality.
|
||||
*
|
||||
* It is illegal to schedule while using a temporary mm -- the context switch
|
||||
* code is unaware of the temporary mm and does not know how to context switch.
|
||||
* Use a real (non-temporary) mm in a kernel thread if you need to sleep.
|
||||
*
|
||||
* Note: For sensitive memory writes, the temporary mm needs to be used
|
||||
* exclusively by a single core, and IRQs should be disabled while the
|
||||
* temporary mm is loaded, thereby preventing interrupt handler bugs from
|
||||
* overriding the kernel memory protection.
|
||||
*/
|
||||
struct mm_struct *use_temporary_mm(struct mm_struct *temp_mm)
|
||||
{
|
||||
struct mm_struct *prev_mm;
|
||||
|
||||
lockdep_assert_preemption_disabled();
|
||||
guard(irqsave)();
|
||||
|
||||
/*
|
||||
* Make sure not to be in TLB lazy mode, as otherwise we'll end up
|
||||
* with a stale address space WITHOUT being in lazy mode after
|
||||
* restoring the previous mm.
|
||||
*/
|
||||
if (this_cpu_read(cpu_tlbstate_shared.is_lazy))
|
||||
leave_mm();
|
||||
|
||||
prev_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
switch_mm_irqs_off(NULL, temp_mm, current);
|
||||
|
||||
/*
|
||||
* If breakpoints are enabled, disable them while the temporary mm is
|
||||
* used. Userspace might set up watchpoints on addresses that are used
|
||||
* in the temporary mm, which would lead to wrong signals being sent or
|
||||
* crashes.
|
||||
*
|
||||
* Note that breakpoints are not disabled selectively, which also causes
|
||||
* kernel breakpoints (e.g., perf's) to be disabled. This might be
|
||||
* undesirable, but still seems reasonable as the code that runs in the
|
||||
* temporary mm should be short.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_disable();
|
||||
|
||||
return prev_mm;
|
||||
}
|
||||
|
||||
void unuse_temporary_mm(struct mm_struct *prev_mm)
|
||||
{
|
||||
lockdep_assert_preemption_disabled();
|
||||
guard(irqsave)();
|
||||
|
||||
/* Clear the cpumask, to indicate no TLB flushing is needed anywhere */
|
||||
cpumask_clear_cpu(smp_processor_id(), mm_cpumask(this_cpu_read(cpu_tlbstate.loaded_mm)));
|
||||
|
||||
switch_mm_irqs_off(NULL, prev_mm, current);
|
||||
|
||||
/*
|
||||
* Restore the breakpoints if they were disabled before the temporary mm
|
||||
* was loaded.
|
||||
*/
|
||||
if (hw_breakpoint_active())
|
||||
hw_breakpoint_restore();
|
||||
}
|
||||
|
||||
/*
|
||||
* Call this when reinitializing a CPU. It fixes the following potential
|
||||
* problems:
|
||||
|
|
|
@ -629,7 +629,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
|
|||
goto out;
|
||||
ret = 1;
|
||||
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
|
||||
text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
|
||||
smp_text_poke_single(ip, new_insn, X86_PATCH_SIZE, NULL);
|
||||
ret = 0;
|
||||
}
|
||||
out:
|
||||
|
|
|
@ -89,6 +89,7 @@ int __init efi_alloc_page_tables(void)
|
|||
efi_mm.pgd = efi_pgd;
|
||||
mm_init_cpumask(&efi_mm);
|
||||
init_new_context(NULL, &efi_mm);
|
||||
set_notrack_mm(&efi_mm);
|
||||
|
||||
return 0;
|
||||
|
||||
|
@ -434,15 +435,12 @@ void __init efi_dump_pagetable(void)
|
|||
*/
|
||||
static void efi_enter_mm(void)
|
||||
{
|
||||
efi_prev_mm = current->active_mm;
|
||||
current->active_mm = &efi_mm;
|
||||
switch_mm(efi_prev_mm, &efi_mm, NULL);
|
||||
efi_prev_mm = use_temporary_mm(&efi_mm);
|
||||
}
|
||||
|
||||
static void efi_leave_mm(void)
|
||||
{
|
||||
current->active_mm = efi_prev_mm;
|
||||
switch_mm(&efi_mm, efi_prev_mm, NULL);
|
||||
unuse_temporary_mm(efi_prev_mm);
|
||||
}
|
||||
|
||||
void arch_efi_call_virt_setup(void)
|
||||
|
|
Loading…
Reference in New Issue
Block a user