From e7256acac3b3c858d22683fa937a772bf209f4af Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Dec 2024 12:56:37 +0100 Subject: [PATCH 01/48] s390/Kconfig: Select KASAN_VMALLOC if KASAN is enabled Reduce the number of to be considered config options and select KASAN_VMALLOC if KASAN is enabled. Reviewed-by: Christian Borntraeger Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 1 + arch/s390/boot/vmem.c | 12 +++--------- arch/s390/configs/kasan.config | 1 - 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 0077969170e8..e0b2647e0694 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -233,6 +233,7 @@ config S390 select HAVE_VIRT_CPU_ACCOUNTING_IDLE select IOMMU_HELPER if PCI select IOMMU_SUPPORT if PCI + select KASAN_VMALLOC if KASAN select LOCK_MM_AND_FIND_VMA select MMU_GATHER_MERGE_VMAS select MMU_GATHER_NO_GATHER diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 145035f84a0e..00a3c59ac218 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -63,7 +63,6 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); unsigned long memgap_start = 0; - unsigned long untracked_end; unsigned long start, end; int i; @@ -93,15 +92,10 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - untracked_end = VMALLOC_START; - /* shallowly populate kasan shadow for vmalloc and modules */ - kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); - } else { - untracked_end = MODULES_VADDR; - } + /* shallowly populate kasan shadow for vmalloc and modules */ + kasan_populate(VMALLOC_START, MODULES_END, POPULATE_KASAN_SHALLOW); /* populate kasan shadow for untracked memory */ - kasan_populate((unsigned long)__identity_va(ident_map_size), untracked_end, + kasan_populate((unsigned long)__identity_va(ident_map_size), VMALLOC_START, POPULATE_KASAN_ZERO_SHADOW); kasan_populate(kernel_end, _REGION1_SIZE, POPULATE_KASAN_ZERO_SHADOW); } diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config index 84c2b551e992..b4e600717411 100644 --- a/arch/s390/configs/kasan.config +++ b/arch/s390/configs/kasan.config @@ -1,4 +1,3 @@ # Help: Enable KASan for debugging CONFIG_KASAN=y CONFIG_KASAN_INLINE=y -CONFIG_KASAN_VMALLOC=y From 27939d6cde904bcea19d0cdb64a79eedf5abc7ce Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Dec 2024 12:56:38 +0100 Subject: [PATCH 02/48] s390/Kconfig: Select VMAP_STACK unconditionally There is no point in supporting !VMAP_STACK kernel builds. VMAP_STACK has proven to work since many years. Also, since KASAN_VMALLOC is supported, kernels built with !VMAP_STACK are completely untested. Therefore select VMAP_STACK unconditionally and remove all config options and code required for !VMAP_STACK builds. Acked-by: Christian Borntraeger Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/Kconfig | 27 +-------------------------- arch/s390/Makefile | 9 --------- arch/s390/kernel/entry.S | 20 ++------------------ arch/s390/kernel/setup.c | 8 -------- arch/s390/kernel/vdso64/Makefile | 2 +- 5 files changed, 4 insertions(+), 62 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index e0b2647e0694..e4931a32b896 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -256,6 +256,7 @@ config S390 select USER_STACKTRACE_SUPPORT select VDSO_GETRANDOM select VIRT_CPU_ACCOUNTING + select VMAP_STACK select ZONE_DMA # Note: keep the above list sorted alphabetically @@ -689,32 +690,6 @@ config MAX_PHYSMEM_BITS Increasing the number of bits also increases the kernel image size. By default 46 bits (64TB) are supported. -config CHECK_STACK - def_bool y - depends on !VMAP_STACK - prompt "Detect kernel stack overflow" - help - This option enables the compiler option -mstack-guard and - -mstack-size if they are available. If the compiler supports them - it will emit additional code to each function prolog to trigger - an illegal operation if the kernel stack is about to overflow. - - Say N if you are unsure. - -config STACK_GUARD - int "Size of the guard area (128-1024)" - range 128 1024 - depends on CHECK_STACK - default "256" - help - This allows you to specify the size of the guard area at the lower - end of the kernel stack. If the kernel stack points into the guard - area on function entry an illegal operation is triggered. The size - needs to be a power of 2. Please keep in mind that the size of an - interrupt frame is 184 bytes for 31 bit and 328 bytes on 64 bit. - The minimum size for the stack guard should be 256 for 31 bit and - 512 for 64 bit. - endmenu menu "I/O subsystem" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 7fd57398221e..3f25498dac65 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -72,15 +72,6 @@ cflags-$(CONFIG_FRAME_POINTER) += -fno-optimize-sibling-calls KBUILD_AFLAGS_DECOMPRESSOR += $(aflags-y) KBUILD_CFLAGS_DECOMPRESSOR += $(cflags-y) -ifneq ($(call cc-option,-mstack-size=8192 -mstack-guard=128),) - CC_FLAGS_CHECK_STACK := -mstack-size=$(STACK_SIZE) - ifeq ($(call cc-option,-mstack-size=8192),) - CC_FLAGS_CHECK_STACK += -mstack-guard=$(CONFIG_STACK_GUARD) - endif - export CC_FLAGS_CHECK_STACK - cflags-$(CONFIG_CHECK_STACK) += $(CC_FLAGS_CHECK_STACK) -endif - ifdef CONFIG_EXPOLINE ifdef CONFIG_EXPOLINE_EXTERN CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 960c08700cf6..4cc3408c4dac 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -52,16 +52,7 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea, lowcore -#ifdef CONFIG_CHECK_STACK - tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - la %r14,\savearea(\lowcore) - jz stack_overflow -#endif - .endm - .macro CHECK_VMAP_STACK savearea, lowcore, oklabel -#ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET @@ -77,9 +68,6 @@ _LPP_OFFSET = __LC_LPP je \oklabel la %r14,\savearea(\lowcore) j stack_overflow -#else - j \oklabel -#endif .endm /* @@ -326,8 +314,7 @@ SYM_CODE_START(pgm_check_handler) jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f CHECK_VMAP_STACK __LC_SAVE_AREA,%r13,4f 3: lg %r15,__LC_KERNEL_STACK(%r13) @@ -394,8 +381,7 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA,%r13 - aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) +0: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) lg %r15,__LC_KERNEL_STACK(%r13) @@ -603,7 +589,6 @@ SYM_CODE_END(early_pgm_check_handler) .section .kprobes.text, "ax" -#if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK) /* * The synchronous or the asynchronous stack overflowed. We are dead. * No need to properly save the registers, we are going to panic anyway. @@ -621,7 +606,6 @@ SYM_CODE_START(stack_overflow) lgr %r2,%r11 # pass pointer to pt_regs jg kernel_stack_overflow SYM_CODE_END(stack_overflow) -#endif .section .data, "aw" .balign 4 diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index a3fea683b227..514c149a4636 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -359,25 +359,17 @@ void *restart_stack; unsigned long stack_alloc(void) { -#ifdef CONFIG_VMAP_STACK void *ret; ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, NUMA_NO_NODE, __builtin_return_address(0)); kmemleak_not_leak(ret); return (unsigned long)ret; -#else - return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); -#endif } void stack_free(unsigned long stack) { -#ifdef CONFIG_VMAP_STACK vfree((void *) stack); -#else - free_pages(stack, THREAD_SIZE_ORDER); -#endif } static unsigned long __init stack_alloc_early(void) diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 37bb4b761229..ad206f2068d8 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -5,7 +5,7 @@ include $(srctree)/lib/vdso/Makefile obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o -VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) +VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) From 7ad0075005088d931a39321dcbb5cb4df8801a37 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 2 Dec 2024 12:56:39 +0100 Subject: [PATCH 03/48] s390/setup: Cleanup stack_alloc() and stack_free() Some small cleanups to stack_alloc() and stack_free(): - Rename ret to stack to reflect what the variable is used for - Whitespace removal Reviewed-by: Vasily Gorbik Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/setup.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 514c149a4636..bd62683c5585 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -359,17 +359,17 @@ void *restart_stack; unsigned long stack_alloc(void) { - void *ret; + void *stack; - ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, - NUMA_NO_NODE, __builtin_return_address(0)); - kmemleak_not_leak(ret); - return (unsigned long)ret; + stack = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP, + NUMA_NO_NODE, __builtin_return_address(0)); + kmemleak_not_leak(stack); + return (unsigned long)stack; } void stack_free(unsigned long stack) { - vfree((void *) stack); + vfree((void *)stack); } static unsigned long __init stack_alloc_early(void) From d809df72b5a583f9fa6e0a722e4e7cb8b28b19fc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Dec 2024 12:30:58 +0100 Subject: [PATCH 04/48] s390/atomic: Implement arch_atomic_inc() / arch_atomic_dec() Implement arch_atomic_inc() / arch_atomic_dec() functions which result in a single instruction if compiled for z196 or newer architectures. Reduces the kernel image size by ~6K (defconfig): bloat-o-meter: add/remove: 0/0 grow/shrink: 12/1005 up/down: 106/-6404 (-6298) Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/atomic.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 6723fca64018..c1a4a06e8340 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -45,6 +45,18 @@ static __always_inline void arch_atomic_add(int i, atomic_t *v) } #define arch_atomic_add arch_atomic_add +static __always_inline void arch_atomic_inc(atomic_t *v) +{ + __atomic_add_const(1, &v->counter); +} +#define arch_atomic_inc arch_atomic_inc + +static __always_inline void arch_atomic_dec(atomic_t *v) +{ + __atomic_add_const(-1, &v->counter); +} +#define arch_atomic_dec arch_atomic_dec + #define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v) #define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) @@ -122,6 +134,18 @@ static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) } #define arch_atomic64_add arch_atomic64_add +static __always_inline void arch_atomic64_inc(atomic64_t *v) +{ + __atomic64_add_const(1, (long *)&v->counter); +} +#define arch_atomic64_inc arch_atomic64_inc + +static __always_inline void arch_atomic64_dec(atomic64_t *v) +{ + __atomic64_add_const(-1, (long *)&v->counter); +} +#define arch_atomic64_dec arch_atomic64_dec + static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); From 9b90c5705786cf0c39356a94b19d842c954fdf16 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Dec 2024 12:30:59 +0100 Subject: [PATCH 05/48] s390/atomic: Consistent layering between atomic.h and atomic_ops.h With commit c8a91c285d8c ("s390/atomic: move remaining inline assemblies to atomic_ops.h") all remaining atomic inline assemblies have been moved to atomic_ops.h. However the result is inconsistent: the functions in atomic_ops.h are supposed to be used with integral types like int and long pointers, while the functions in atomic.h work with atomic types. This layering got violated with the named commit. Therefore adjust this now, and also use consistent variable names in atomic_ops.h. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/atomic.h | 8 +++--- arch/s390/include/asm/atomic_ops.h | 44 +++++++++++++++--------------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index c1a4a06e8340..92d6f33b6046 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -17,13 +17,13 @@ static __always_inline int arch_atomic_read(const atomic_t *v) { - return __atomic_read(v); + return __atomic_read(&v->counter); } #define arch_atomic_read arch_atomic_read static __always_inline void arch_atomic_set(atomic_t *v, int i) { - __atomic_set(v, i); + __atomic_set(&v->counter, i); } #define arch_atomic_set arch_atomic_set @@ -106,13 +106,13 @@ static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int n static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { - return __atomic64_read(v); + return __atomic64_read((long *)&v->counter); } #define arch_atomic64_read arch_atomic64_read static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { - __atomic64_set(v, i); + __atomic64_set((long *)&v->counter, i); } #define arch_atomic64_set arch_atomic64_set diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 1d6b2056fad8..90573508d045 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -11,49 +11,49 @@ #include #include -static __always_inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const int *ptr) { - int c; + int val; asm volatile( - " l %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "R" (v->counter)); - return c; + " l %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "R" (*ptr)); + return val; } -static __always_inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(int *ptr, int val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvhi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvhi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " st %[i],%[counter]\n" - : [counter] "=R" (v->counter) : [i] "d" (i)); + " st %[val],%[ptr]\n" + : [ptr] "=R" (*ptr) : [val] "d" (val)); } } -static __always_inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline long __atomic64_read(const long *ptr) { - s64 c; + long val; asm volatile( - " lg %[c],%[counter]\n" - : [c] "=d" (c) : [counter] "RT" (v->counter)); - return c; + " lg %[val],%[ptr]\n" + : [val] "=d" (val) : [ptr] "RT" (*ptr)); + return val; } -static __always_inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(long *ptr, long val) { - if (__builtin_constant_p(i) && i >= S16_MIN && i <= S16_MAX) { + if (__builtin_constant_p(val) && val >= S16_MIN && val <= S16_MAX) { asm volatile( - " mvghi %[counter], %[i]\n" - : [counter] "=Q" (v->counter) : [i] "K" (i)); + " mvghi %[ptr],%[val]\n" + : [ptr] "=Q" (*ptr) : [val] "K" (val)); } else { asm volatile( - " stg %[i],%[counter]\n" - : [counter] "=RT" (v->counter) : [i] "d" (i)); + " stg %[val],%[ptr]\n" + : [ptr] "=RT" (*ptr) : [val] "d" (val)); } } From a7af4fb85e208f31cc767245516460b1af287b36 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Dec 2024 12:31:00 +0100 Subject: [PATCH 06/48] s390/preempt: Add comments The s390 preempt_count implementation is more or less a copy of the x86 implementation using different instructions. For clarification how this works also add all comments from x86 with some minor modifications. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/preempt.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 2c29bdf12127..3e3f21385db9 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,8 +12,17 @@ /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 + +/* + * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such + * that a decrement hitting 0 means we can and should reschedule. + */ #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) +/* + * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users + * that think a non-zero value indicates we cannot preempt. + */ static __always_inline int preempt_count(void) { return READ_ONCE(get_lowcore()->preempt_count) & ~PREEMPT_NEED_RESCHED; @@ -29,6 +38,15 @@ static __always_inline void preempt_count_set(int pc) } while (!arch_try_cmpxchg(&get_lowcore()->preempt_count, &old, new)); } +/* + * We fold the NEED_RESCHED bit into the preempt count such that + * preempt_enable() can decrement and test for needing to reschedule with a + * short instruction sequence. + * + * We invert the actual bit, so that when the decrement hits 0 we know we both + * need to resched (the bit is cleared) and can resched (no preempt count). + */ + static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &get_lowcore()->preempt_count); @@ -64,11 +82,19 @@ static __always_inline void __preempt_count_sub(int val) __preempt_count_add(-val); } +/* + * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule + * a decrement which hits zero means we have no preempt_count and should + * reschedule. + */ static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &get_lowcore()->preempt_count) == 1; } +/* + * Returns true when we need to resched and can (barring IRQ state). + */ static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(get_lowcore()->preempt_count) == From 5eee66c5bf6034c84a67fdbf224110f5288d9195 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Dec 2024 12:31:01 +0100 Subject: [PATCH 07/48] s390/preempt: Remove special pre MARCH_HAS_Z196_FEATURES implementation Remove the preempt count implementation for pre MARCH_HAS_Z196_FEATURES builds. If the kernel is compiled with PREEMPT=n, which is the default for all distributions, this has close to zero impact in the generated code. Therefore remove the alternative implementation to keep things simple. Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/preempt.h | 52 --------------------------------- 1 file changed, 52 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 3e3f21385db9..e100dd32f766 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -8,8 +8,6 @@ #include #include -#ifdef MARCH_HAS_Z196_FEATURES - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -101,56 +99,6 @@ static __always_inline bool should_resched(int preempt_offset) preempt_offset); } -#else /* MARCH_HAS_Z196_FEATURES */ - -#define PREEMPT_ENABLED (0) - -static __always_inline int preempt_count(void) -{ - return READ_ONCE(get_lowcore()->preempt_count); -} - -static __always_inline void preempt_count_set(int pc) -{ - get_lowcore()->preempt_count = pc; -} - -static __always_inline void set_preempt_need_resched(void) -{ -} - -static __always_inline void clear_preempt_need_resched(void) -{ -} - -static __always_inline bool test_preempt_need_resched(void) -{ - return false; -} - -static __always_inline void __preempt_count_add(int val) -{ - get_lowcore()->preempt_count += val; -} - -static __always_inline void __preempt_count_sub(int val) -{ - get_lowcore()->preempt_count -= val; -} - -static __always_inline bool __preempt_count_dec_and_test(void) -{ - return !--get_lowcore()->preempt_count && tif_need_resched(); -} - -static __always_inline bool should_resched(int preempt_offset) -{ - return unlikely(preempt_count() == preempt_offset && - tif_need_resched()); -} - -#endif /* MARCH_HAS_Z196_FEATURES */ - #define init_task_preempt_count(p) do { } while (0) /* Deferred to CPU bringup time */ #define init_idle_preempt_count(p, cpu) do { } while (0) From 2ca248f52afa4f55fd3fc2e7c5e123385b192f14 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Dec 2024 12:31:02 +0100 Subject: [PATCH 08/48] s390/preempt: Adjust coding style Just remove a line break which reduces readability. Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/preempt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index e100dd32f766..a55e1767b4d4 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -95,8 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(READ_ONCE(get_lowcore()->preempt_count) == - preempt_offset); + return unlikely(READ_ONCE(get_lowcore()->preempt_count) == preempt_offset); } #define init_task_preempt_count(p) do { } while (0) From 7c7f32c9ee8ecd626dffc28be601b37c111ebd60 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Dec 2024 12:58:03 +0100 Subject: [PATCH 09/48] s390: Remove superfluous new lines from inline assemblies GCC uses the number of lines of an inline assembly to calculate its length (number of instructions). This has an impact on GCCs inlining decisions. Therefore remove superfluous new lines from a couple of inline assemblies, so that their real size is reflected. Also use an "asm inline" statement for the fpu_lfpc_safe() inline assembly to enforce that GCC assumes the minimum size for this inline assembly, since it contains various statements which make it appear much larger than the resulting code is. Suggested-by: Juergen Christ Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/atomic_ops.h | 4 ++-- arch/s390/include/asm/checksum.h | 2 +- arch/s390/include/asm/fpu-insn.h | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 90573508d045..021503d81ed1 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -73,7 +73,7 @@ static __always_inline op_type op_name(op_type val, op_type *ptr) \ } \ #define __ATOMIC_OPS(op_name, op_type, op_string) \ - __ATOMIC_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_OP(op_name, op_type, op_string, "") \ __ATOMIC_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_OPS(__atomic_add, int, "laa") @@ -99,7 +99,7 @@ static __always_inline void op_name(op_type val, op_type *ptr) \ } #define __ATOMIC_CONST_OPS(op_name, op_type, op_string) \ - __ATOMIC_CONST_OP(op_name, op_type, op_string, "\n") \ + __ATOMIC_CONST_OP(op_name, op_type, op_string, "") \ __ATOMIC_CONST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") __ATOMIC_CONST_OPS(__atomic_add_const, int, "asi") diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 46f5c9660616..d86dea5900e7 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -25,7 +25,7 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum) instrument_read(buff, len); kmsan_check_memory(buff, len); - asm volatile("\n" + asm volatile( "0: cksm %[sum],%[rp]\n" " jo 0b\n" : [sum] "+&d" (sum), [rp] "+&d" (rp.pair) : : "cc", "memory"); diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h index c1e2e521d9af..de510c9f6efa 100644 --- a/arch/s390/include/asm/fpu-insn.h +++ b/arch/s390/include/asm/fpu-insn.h @@ -103,7 +103,7 @@ static inline void fpu_lfpc_safe(unsigned int *fpc) u32 tmp; instrument_read(fpc, sizeof(*fpc)); - asm volatile("\n" + asm_inline volatile( "0: lfpc %[fpc]\n" "1: nopr %%r7\n" ".pushsection .fixup, \"ax\"\n" @@ -188,7 +188,7 @@ static __always_inline void fpu_vgfmg(u8 v1, u8 v2, u8 v3) static __always_inline void fpu_vl(u8 v1, const void *vxr) { instrument_read(vxr, sizeof(__vector128)); - asm volatile("\n" + asm volatile( " la 1,%[vxr]\n" " VL %[v1],0,,1\n" : @@ -246,7 +246,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_read(vxr, size); - asm volatile("\n" + asm volatile( " la 1,%[vxr]\n" " VLL %[v1],%[index],0,1\n" : @@ -284,7 +284,7 @@ static __always_inline void fpu_vll(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_read(_v, size); \ - asm volatile("\n" \ + asm volatile( \ " la 1,%[vxrs]\n" \ " VLM %[v1],%[v3],0,1\n" \ : \ @@ -367,7 +367,7 @@ static __always_inline void fpu_vsrlb(u8 v1, u8 v2, u8 v3) static __always_inline void fpu_vst(u8 v1, const void *vxr) { instrument_write(vxr, sizeof(__vector128)); - asm volatile("\n" + asm volatile( " la 1,%[vxr]\n" " VST %[v1],0,,1\n" : [vxr] "=R" (*(__vector128 *)vxr) @@ -396,7 +396,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) size = min(index + 1, sizeof(__vector128)); instrument_write(vxr, size); - asm volatile("\n" + asm volatile( " la 1,%[vxr]\n" " VSTL %[v1],%[index],0,1\n" : [vxr] "=R" (*(u8 *)vxr) @@ -430,7 +430,7 @@ static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr) } *_v = (void *)(_vxrs); \ \ instrument_write(_v, size); \ - asm volatile("\n" \ + asm volatile( \ " la 1,%[vxrs]\n" \ " VSTM %[v1],%[v3],0,1\n" \ : [vxrs] "=R" (*_v) \ From a53f5d247e24f4d3fb1218f23abdb5096488f4cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Dec 2024 12:58:04 +0100 Subject: [PATCH 10/48] s390/atomic: Provide arch_atomic_*_and_test() implementations Provide arch_atomic_*_and_test() implementations which make use of flag output constraints, and allow the compiler to generate slightly better code. Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/atomic.h | 36 +++++++++++++++ arch/s390/include/asm/atomic_ops.h | 73 ++++++++++++++++++++++++++++++ 2 files changed, 109 insertions(+) diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 92d6f33b6046..b36dd6a1d652 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -57,6 +57,24 @@ static __always_inline void arch_atomic_dec(atomic_t *v) } #define arch_atomic_dec arch_atomic_dec +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) +{ + return __atomic_add_and_test_barrier(-i, &v->counter); +} +#define arch_atomic_sub_and_test arch_atomic_sub_and_test + +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(-1, &v->counter); +} +#define arch_atomic_dec_and_test arch_atomic_dec_and_test + +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) +{ + return __atomic_add_const_and_test_barrier(1, &v->counter); +} +#define arch_atomic_inc_and_test arch_atomic_inc_and_test + #define arch_atomic_sub(_i, _v) arch_atomic_add(-(int)(_i), _v) #define arch_atomic_sub_return(_i, _v) arch_atomic_add_return(-(int)(_i), _v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) @@ -146,6 +164,24 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v) } #define arch_atomic64_dec arch_atomic64_dec +static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v) +{ + return __atomic64_add_and_test_barrier(-i, (long *)&v->counter); +} +#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test + +static __always_inline bool arch_atomic64_dec_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(-1, (long *)&v->counter); +} +#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test + +static __always_inline bool arch_atomic64_inc_and_test(atomic64_t *v) +{ + return __atomic64_add_const_and_test_barrier(1, (long *)&v->counter); +} +#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test + static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new) { return arch_xchg(&v->counter, new); diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 021503d81ed1..585678bbcd7a 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -10,6 +10,7 @@ #include #include +#include static __always_inline int __atomic_read(const int *ptr) { @@ -169,4 +170,76 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* MARCH_HAS_Z196_FEATURES */ +#if defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) + +#define __ATOMIC_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + op_type tmp; \ + int cc; \ + \ + asm volatile( \ + op_string " %[tmp],%[val],%[ptr]\n" \ + op_barrier \ + : "=@cc" (cc), [tmp] "=d" (tmp), [ptr] "+QS" (*ptr) \ + : [val] "d" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} \ + +#define __ATOMIC_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_TEST_OPS(__atomic_add_and_test, int, "laal") +__ATOMIC_TEST_OPS(__atomic64_add_and_test, long, "laalg") + +#undef __ATOMIC_TEST_OPS +#undef __ATOMIC_TEST_OP + +#define __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, op_barrier) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + int cc; \ + \ + asm volatile( \ + op_string " %[ptr],%[val]\n" \ + op_barrier \ + : "=@cc" (cc), [ptr] "+QS" (*ptr) \ + : [val] "i" (val) \ + : "memory"); \ + return (cc == 0) || (cc == 2); \ +} + +#define __ATOMIC_CONST_TEST_OPS(op_name, op_type, op_string) \ + __ATOMIC_CONST_TEST_OP(op_name, op_type, op_string, "") \ + __ATOMIC_CONST_TEST_OP(op_name##_barrier, op_type, op_string, "bcr 14,0\n") + +__ATOMIC_CONST_TEST_OPS(__atomic_add_const_and_test, int, "alsi") +__ATOMIC_CONST_TEST_OPS(__atomic64_add_const_and_test, long, "algsi") + +#undef __ATOMIC_CONST_TEST_OPS +#undef __ATOMIC_CONST_TEST_OP + +#else /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + +#define __ATOMIC_TEST_OP(op_name, op_func, op_type) \ +static __always_inline bool op_name(op_type val, op_type *ptr) \ +{ \ + return op_func(val, ptr) == -val; \ +} + +__ATOMIC_TEST_OP(__atomic_add_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test, __atomic_add, int) +__ATOMIC_TEST_OP(__atomic_add_const_and_test_barrier, __atomic_add_barrier, int) +__ATOMIC_TEST_OP(__atomic64_add_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_and_test_barrier, __atomic64_add_barrier, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test, __atomic64_add, long) +__ATOMIC_TEST_OP(__atomic64_add_const_and_test_barrier, __atomic64_add_barrier, long) + +#undef __ATOMIC_TEST_OP + +#endif /* defined(MARCH_HAS_Z196_FEATURES) && defined(__HAVE_ASM_FLAG_OUTPUTS__) */ + #endif /* __ARCH_S390_ATOMIC_OPS__ */ From 5c9e37e5233ec3a6b4f2d160c697b8b28d13f134 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 11 Dec 2024 12:58:05 +0100 Subject: [PATCH 11/48] s390/preempt: Optimize __preempt_count_dec_and_test() Use __atomic_add_const_and_test() within __preempt_count_dec_and_test(). With this it is possible to decrease preempt_count by one and test if need_resched is set with one instruction, if the compiler has support for flag output operand constraints. Reviewed-by: Juergen Christ Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/preempt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index a55e1767b4d4..6ccd033acfe5 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -87,7 +87,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return __atomic_add(-1, &get_lowcore()->preempt_count) == 1; + return __atomic_add_const_and_test(-1, &get_lowcore()->preempt_count); } /* From 912a0d35232686673a92a3728606f618da64291e Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2024 12:35:40 +0100 Subject: [PATCH 12/48] s390: Remove __bootdata annotations from declarations For consistency, remove the `__bootdata` and `__bootdata_preserved` section annotations from variable declarations in header files. Section annotations should be applied to definitions, not declarations. This change moves the annotations to the variable definitions in the corresponding source files. Acked-by: Heiko Carstens Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/abs_lowcore.h | 3 +-- arch/s390/include/asm/page-states.h | 3 +-- arch/s390/include/asm/pgtable.h | 15 +++++++-------- arch/s390/kernel/abs_lowcore.c | 1 + arch/s390/kernel/os_info.c | 1 + arch/s390/kernel/setup.c | 12 ++++++------ arch/s390/kernel/vmcore_info.c | 3 ++- arch/s390/mm/maccess.c | 1 + 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index d20df8c923fc..17afb40f77a4 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,7 +2,6 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H -#include #include #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -25,7 +24,7 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } -extern int __bootdata_preserved(relocate_lowcore); +extern int relocate_lowcore; static inline int have_relocated_lowcore(void) { diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h index 08fcbd628120..794fdb21500a 100644 --- a/arch/s390/include/asm/page-states.h +++ b/arch/s390/include/asm/page-states.h @@ -7,7 +7,6 @@ #ifndef PAGE_STATES_H #define PAGE_STATES_H -#include #include #define ESSA_GET_STATE 0 @@ -21,7 +20,7 @@ #define ESSA_MAX ESSA_SET_STABLE_NODAT -extern int __bootdata_preserved(cmma_flag); +extern int cmma_flag; static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd) { diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 48268095b0a3..3b21aecf69ac 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -35,7 +34,7 @@ enum { PG_DIRECT_MAP_MAX }; -extern atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); +extern atomic_long_t direct_pages_count[PG_DIRECT_MAP_MAX]; static inline void update_page_count(int level, long count) { @@ -85,14 +84,14 @@ extern unsigned long zero_page_mask; * happen without trampolines and in addition the placement within a * 2GB frame is branch prediction unit friendly. */ -extern unsigned long __bootdata_preserved(VMALLOC_START); -extern unsigned long __bootdata_preserved(VMALLOC_END); +extern unsigned long VMALLOC_START; +extern unsigned long VMALLOC_END; #define VMALLOC_DEFAULT_SIZE ((512UL << 30) - MODULES_LEN) -extern struct page *__bootdata_preserved(vmemmap); -extern unsigned long __bootdata_preserved(vmemmap_size); +extern struct page *vmemmap; +extern unsigned long vmemmap_size; -extern unsigned long __bootdata_preserved(MODULES_VADDR); -extern unsigned long __bootdata_preserved(MODULES_END); +extern unsigned long MODULES_VADDR; +extern unsigned long MODULES_END; #define MODULES_VADDR MODULES_VADDR #define MODULES_END MODULES_END #define MODULES_LEN (1UL << 31) diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index 09cd24cbe74e..88f0b91d7a73 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -2,6 +2,7 @@ #include #include +#include unsigned long __bootdata_preserved(__abs_lowcore); int __bootdata_preserved(relocate_lowcore); diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index 29080d6d5d8d..c2a468986212 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -18,6 +18,7 @@ #include #include #include +#include #include /* diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index bd62683c5585..95324aaa17e2 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -157,18 +157,18 @@ u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); struct oldmem_data __bootdata_preserved(oldmem_data); -unsigned long VMALLOC_START; +unsigned long __bootdata_preserved(VMALLOC_START); EXPORT_SYMBOL(VMALLOC_START); -unsigned long VMALLOC_END; +unsigned long __bootdata_preserved(VMALLOC_END); EXPORT_SYMBOL(VMALLOC_END); -struct page *vmemmap; +struct page *__bootdata_preserved(vmemmap); EXPORT_SYMBOL(vmemmap); -unsigned long vmemmap_size; +unsigned long __bootdata_preserved(vmemmap_size); -unsigned long MODULES_VADDR; -unsigned long MODULES_END; +unsigned long __bootdata_preserved(MODULES_VADDR); +unsigned long __bootdata_preserved(MODULES_END); /* An array with a pointer to the lowcore of every CPU. */ struct lowcore *lowcore_ptr[NR_CPUS]; diff --git a/arch/s390/kernel/vmcore_info.c b/arch/s390/kernel/vmcore_info.c index 23f7d7619a99..cc8933e04ff7 100644 --- a/arch/s390/kernel/vmcore_info.c +++ b/arch/s390/kernel/vmcore_info.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include -#include #include +#include +#include #include void arch_crash_save_vmcoreinfo(void) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 28a18c42ba99..44426e0f2944 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include From 01dc3a0d5cd67de28bcd48c2be196b26dd2dde9f Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2024 12:35:46 +0100 Subject: [PATCH 13/48] s390/abs_lowcore: Include linux/smp.h for get_cpu() and put_cpu() Add missing include of in abs_lowcore.h to provide declarations for get_cpu() and put_cpu() used in the code. Reviewed-by: Heiko Carstens Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/abs_lowcore.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 17afb40f77a4..004d17ea05cf 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H +#include #include #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) From 8fb72561602322bcd226460fc16f74f0bc003988 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Tue, 10 Dec 2024 12:35:49 +0100 Subject: [PATCH 14/48] s390: Add KERNEL_IMAGE_BASE to kasan.config Although Kconfig specifies: config KERNEL_IMAGE_BASE hex "Kernel image base address" range 0x100000 0x1FFFFFE0000000 if !KASAN range 0x100000 0x1BFFFFE0000000 if KASAN default 0x3FFE0000000 if !KASAN default 0x7FFFE0000000 if KASAN Running make defconfig or make debug_defconfig followed by make kasan.config results in a suboptimal CONFIG_KERNEL_IMAGE_BASE=0x3FFE0000000. Add CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000 to kasan.config to address that. Acked-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Vasily Gorbik Signed-off-by: Alexander Gordeev --- arch/s390/configs/kasan.config | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/configs/kasan.config b/arch/s390/configs/kasan.config index b4e600717411..cefbe2ba1228 100644 --- a/arch/s390/configs/kasan.config +++ b/arch/s390/configs/kasan.config @@ -1,3 +1,4 @@ # Help: Enable KASan for debugging CONFIG_KASAN=y CONFIG_KASAN_INLINE=y +CONFIG_KERNEL_IMAGE_BASE=0x7FFFE0000000 From 4670f7bc17dfce649a1877c72643c133ef9055ab Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 6 Dec 2024 14:52:35 +0100 Subject: [PATCH 15/48] s390/mm/hugetlbfs: Remove huge_pte_none() / huge_pte_none_mostly() Slightly cleanup arch/s390/include/asm/hugetlb.h: - Remove huge_pte_none() / huge_pte_none_mostly() which are identical to the generic variants - Coding style adjustments Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/hugetlb.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index a40664b236e9..7c52acaf9f82 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -20,12 +20,13 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, unsigned long sz); void __set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte); + pte_t *ptep, pte_t pte); + #define __HAVE_ARCH_HUGE_PTEP_GET -extern pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + #define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR -extern pte_t huge_ptep_get_and_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep); +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); static inline void arch_clear_hugetlb_flags(struct folio *folio) { @@ -56,6 +57,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t pte, int dirty) { int changed = !pte_same(huge_ptep_get(vma->vm_mm, addr, ptep), pte); + if (changed) { huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); __set_huge_pte_at(vma->vm_mm, addr, ptep, pte); @@ -68,21 +70,10 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t pte = huge_ptep_get_and_clear(mm, addr, ptep); + __set_huge_pte_at(mm, addr, ptep, pte_wrprotect(pte)); } -#define __HAVE_ARCH_HUGE_PTE_NONE -static inline int huge_pte_none(pte_t pte) -{ - return pte_none(pte); -} - -#define __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); -} - #define __HAVE_ARCH_HUGE_PTE_MKUFFD_WP static inline pte_t huge_pte_mkuffd_wp(pte_t pte) { From 4ec6054e7321dc24ebccaa08b3af0d590f5666e6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:28 +0100 Subject: [PATCH 16/48] s390/pci: Report PCI error recovery results via SCLP Add a mechanism with which the status of PCI error recovery runs is reported to the platform. Together with the status supply additional information that may aid in problem determination. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/sclp.h | 33 +++++++++++ arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci_event.c | 21 +++++-- arch/s390/pci/pci_report.c | 111 +++++++++++++++++++++++++++++++++++ arch/s390/pci/pci_report.h | 16 +++++ drivers/s390/char/sclp.h | 14 ----- drivers/s390/char/sclp_pci.c | 19 ------ 7 files changed, 178 insertions(+), 38 deletions(-) create mode 100644 arch/s390/pci/pci_report.c create mode 100644 arch/s390/pci/pci_report.h diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..3267631b5adc 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -16,6 +16,11 @@ /* 24 + 16 * SCLP_MAX_CORES */ #define EXT_SCCB_READ_CPU (3 * PAGE_SIZE) +#define SCLP_ERRNOTIFY_AQ_RESET 0 +#define SCLP_ERRNOTIFY_AQ_REPAIR 1 +#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 +#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 + #ifndef __ASSEMBLY__ #include #include @@ -111,6 +116,34 @@ struct sclp_info { }; extern struct sclp_info sclp; +struct sccb_header { + u16 length; + u8 function_code; + u8 control_mask[3]; + u16 response_code; +} __packed; + +struct evbuf_header { + u16 length; + u8 type; + u8 flags; + u16 _reserved; +} __packed; + +struct err_notify_evbuf { + struct evbuf_header header; + u8 action; + u8 atype; + u32 fh; + u32 fid; + u8 data[]; +} __packed; + +struct err_notify_sccb { + struct sccb_header header; + struct err_notify_evbuf evbuf; +} __packed; + struct zpci_report_error_header { u8 version; /* Interface version byte */ u8 action; /* Action qualifier byte diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 2c21f0394c9a..df73c5182990 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -5,6 +5,6 @@ obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ - pci_bus.o pci_kvm_hook.o + pci_bus.o pci_kvm_hook.o pci_report.o obj-$(CONFIG_PCI_IOV) += pci_iov.o obj-$(CONFIG_SYSFS) += pci_sysfs.o diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 7f7b732b3f3e..7bd7721c1239 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -16,6 +16,7 @@ #include #include "pci_bus.h" +#include "pci_report.h" /* Content Code Description for PCI Function Error */ struct zpci_ccdf_err { @@ -169,6 +170,8 @@ static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) { pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; + struct zpci_dev *zdev = to_zpci(pdev); + char *status_str = "success"; struct pci_driver *driver; /* @@ -186,29 +189,37 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); + status_str = "failed (pass-through)"; goto out_unlock; } driver = to_pci_driver(pdev->dev.driver); if (!is_driver_supported(driver)) { - if (!driver) + if (!driver) { pr_info("%s: Cannot be recovered because no driver is bound to the device\n", pci_name(pdev)); - else + status_str = "failed (no driver)"; + } else { pr_info("%s: The %s driver bound to the device does not support error recovery\n", pci_name(pdev), driver->name); + status_str = "failed (no driver support)"; + } goto out_unlock; } ers_res = zpci_event_notify_error_detected(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on detection)"; goto out_unlock; + } if (ers_res == PCI_ERS_RESULT_CAN_RECOVER) { ers_res = zpci_event_do_error_state_clear(pdev, driver); - if (ers_result_indicates_abort(ers_res)) + if (ers_result_indicates_abort(ers_res)) { + status_str = "failed (abort on MMIO enable)"; goto out_unlock; + } } if (ers_res == PCI_ERS_RESULT_NEED_RESET) @@ -217,6 +228,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) if (ers_res != PCI_ERS_RESULT_RECOVERED) { pr_err("%s: Automatic recovery failed; operator intervention is required\n", pci_name(pdev)); + status_str = "failed (driver can't recover)"; goto out_unlock; } @@ -225,6 +237,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) driver->err_handler->resume(pdev); out_unlock: pci_dev_unlock(pdev); + zpci_report_status(zdev, "recovery", status_str); return ers_res; } diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c new file mode 100644 index 000000000000..2754c9c161f5 --- /dev/null +++ b/arch/s390/pci/pci_report.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle + * + */ + +#define KMSG_COMPONENT "zpci" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include + +#include + +#include "pci_report.h" + +#define ZPCI_ERR_LOG_ID_KERNEL_REPORT 0x4714 + +struct zpci_report_error_data { + u64 timestamp; + u64 err_log_id; + char log_data[]; +} __packed; + +#define ZPCI_REPORT_SIZE (PAGE_SIZE - sizeof(struct err_notify_sccb)) +#define ZPCI_REPORT_DATA_SIZE (ZPCI_REPORT_SIZE - sizeof(struct zpci_report_error_data)) + +struct zpci_report_error { + struct zpci_report_error_header header; + struct zpci_report_error_data data; +} __packed; + +static const char *zpci_state_str(pci_channel_state_t state) +{ + switch (state) { + case pci_channel_io_normal: + return "normal"; + case pci_channel_io_frozen: + return "frozen"; + case pci_channel_io_perm_failure: + return "permanent-failure"; + default: + return "invalid"; + }; +} + +/** + * zpci_report_status - Report the status of operations on a PCI device + * @zdev: The PCI device for which to report status + * @operation: A string representing the operation reported + * @status: A string representing the status of the operation + * + * This function creates a human readable report about an operation such as + * PCI device recovery and forwards this to the platform using the SCLP Write + * Event Data mechanism. Besides the operation and status strings the report + * also contains additional information about the device deemed useful for + * debug such as the currently bound device driver, if any, and error state. + * + * Return: 0 on success an error code < 0 otherwise. + */ +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status) +{ + struct zpci_report_error *report; + struct pci_driver *driver = NULL; + struct pci_dev *pdev = NULL; + char *buf, *end; + int ret; + + if (!zdev || !zdev->zbus) + return -ENODEV; + + /* Protected virtualization hosts get nothing from us */ + if (prot_virt_guest) + return -ENODATA; + + report = (void *)get_zeroed_page(GFP_KERNEL); + if (!report) + return -ENOMEM; + if (zdev->zbus->bus) + pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); + if (pdev) + driver = to_pci_driver(pdev->dev.driver); + + buf = report->data.log_data; + end = report->data.log_data + ZPCI_REPORT_DATA_SIZE; + buf += scnprintf(buf, end - buf, "report: %s\n", operation); + buf += scnprintf(buf, end - buf, "status: %s\n", status); + buf += scnprintf(buf, end - buf, "state: %s\n", + (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); + buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + + report->header.version = 1; + report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; + report->header.length = buf - (char *)&report->data; + report->data.timestamp = ktime_get_clocktai_seconds(); + report->data.err_log_id = ZPCI_ERR_LOG_ID_KERNEL_REPORT; + + ret = sclp_pci_report(&report->header, zdev->fh, zdev->fid); + if (ret) + pr_err("Reporting PCI status failed with code %d\n", ret); + else + pr_info("Reported PCI device status\n"); + + free_page((unsigned long)report); + + return ret; +} diff --git a/arch/s390/pci/pci_report.h b/arch/s390/pci/pci_report.h new file mode 100644 index 000000000000..e08003d51a97 --- /dev/null +++ b/arch/s390/pci/pci_report.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 2024 + * + * Author(s): + * Niklas Schnelle + * + */ +#ifndef __S390_PCI_REPORT_H +#define __S390_PCI_REPORT_H + +struct zpci_dev; + +int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char *status); + +#endif /* __S390_PCI_REPORT_H */ diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6c91e422927f..73731fa2594e 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -85,13 +85,6 @@ typedef unsigned int sclp_cmdw_t; typedef u64 sccb_mask_t; -struct sccb_header { - u16 length; - u8 function_code; - u8 control_mask[3]; - u16 response_code; -} __attribute__((packed)); - struct init_sccb { struct sccb_header header; u16 _reserved; @@ -238,13 +231,6 @@ struct gds_vector { u16 gds_id; } __attribute__((packed)); -struct evbuf_header { - u16 length; - u8 type; - u8 flags; - u16 _reserved; -} __attribute__((packed)); - struct sclp_req { struct list_head list; /* list_head for request queueing. */ sclp_cmdw_t command; /* sclp command to execute */ diff --git a/drivers/s390/char/sclp_pci.c b/drivers/s390/char/sclp_pci.c index c3466a8c56bb..56400886f7fc 100644 --- a/drivers/s390/char/sclp_pci.c +++ b/drivers/s390/char/sclp_pci.c @@ -24,30 +24,11 @@ #define SCLP_ATYPE_PCI 2 -#define SCLP_ERRNOTIFY_AQ_RESET 0 -#define SCLP_ERRNOTIFY_AQ_REPAIR 1 -#define SCLP_ERRNOTIFY_AQ_INFO_LOG 2 -#define SCLP_ERRNOTIFY_AQ_OPTICS_DATA 3 - static DEFINE_MUTEX(sclp_pci_mutex); static struct sclp_register sclp_pci_event = { .send_mask = EVTYP_ERRNOTIFY_MASK, }; -struct err_notify_evbuf { - struct evbuf_header header; - u8 action; - u8 atype; - u32 fh; - u32 fid; - u8 data[]; -} __packed; - -struct err_notify_sccb { - struct sccb_header header; - struct err_notify_evbuf evbuf; -} __packed; - struct pci_cfg_sccb { struct sccb_header header; u8 atype; /* adapter type */ From 7832b3047d10e2c1f9cfed49de818a38aea251f6 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:29 +0100 Subject: [PATCH 17/48] s390/debug: Simplify and document debug_next_entry() logic Contrary to convention debug_next_entry() returns a falsy 0 value if there are more entries and a truthy 1 value when there are no more entries. As there is only one caller just reverse this logic to be less surprising and document the behavior in a kdoc comment. Also replace the goto with an early return. In the future this allows using it in a do {} while (debug_next_entry(...)) loop. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index de19fd8a6a95..e3598dbccd24 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -422,11 +422,17 @@ out: return len; } -/* - * debug_next_entry: - * - goto next entry in p_info +/** + * debug_next_entry - Go to the next entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the next entry. If no further entry + * exists the current position is set to one after the end the return value + * indicates that no further entries exist. + * + * Return: True if there are more following entries, false otherwise */ -static inline int debug_next_entry(file_private_info_t *p_info) +static inline bool debug_next_entry(file_private_info_t *p_info) { debug_info_t *id; @@ -434,10 +440,10 @@ static inline int debug_next_entry(file_private_info_t *p_info) if (p_info->act_entry == DEBUG_PROLOG_ENTRY) { p_info->act_entry = 0; p_info->act_page = 0; - goto out; + return true; } if (!id->areas) - return 1; + return false; p_info->act_entry += id->entry_size; /* switch to next page, if we reached the end of the page */ if (p_info->act_entry > (PAGE_SIZE - id->entry_size)) { @@ -450,10 +456,9 @@ static inline int debug_next_entry(file_private_info_t *p_info) p_info->act_page = 0; } if (p_info->act_area >= id->nr_areas) - return 1; + return false; } -out: - return 0; + return true; } /* @@ -495,7 +500,7 @@ static ssize_t debug_output(struct file *file, /* file descriptor */ } if (copy_size == formatted_line_residue) { entry_offset = 0; - if (debug_next_entry(p_info)) + if (!debug_next_entry(p_info)) goto out; } } From 460c52a57f83f0cb510ba04ac8263e1ee95b2d66 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:30 +0100 Subject: [PATCH 18/48] s390/debug: Split private data alloc/free out of file operations Split the allocation respectively freeing of file_private_info_t out of open() respectively close(). This will be used in a follow on change to access to debug views without going through the s390dbf filesystem. Reviewed-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/debug.c | 78 ++++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index e3598dbccd24..463c9a19a3b5 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -535,6 +535,42 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, return rc; /* number of input characters */ } +static file_private_info_t *debug_file_private_alloc(debug_info_t *debug_info, + struct debug_view *view) +{ + debug_info_t *debug_info_snapshot; + file_private_info_t *p_info; + + /* + * Make snapshot of current debug areas to get it consistent. + * To copy all the areas is only needed, if we have a view which + * formats the debug areas. + */ + if (!view->format_proc && !view->header_proc) + debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); + else + debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); + + if (!debug_info_snapshot) + return NULL; + p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + if (!p_info) { + debug_info_free(debug_info_snapshot); + return NULL; + } + p_info->offset = 0; + p_info->debug_info_snap = debug_info_snapshot; + p_info->debug_info_org = debug_info; + p_info->view = view; + p_info->act_area = 0; + p_info->act_page = 0; + p_info->act_entry = DEBUG_PROLOG_ENTRY; + p_info->act_entry_offset = 0; + debug_info_get(debug_info); + + return p_info; +} + /* * debug_open: * - called for user open() @@ -543,7 +579,7 @@ static ssize_t debug_input(struct file *file, const char __user *user_buf, */ static int debug_open(struct inode *inode, struct file *file) { - debug_info_t *debug_info, *debug_info_snapshot; + debug_info_t *debug_info; file_private_info_t *p_info; int i, rc = 0; @@ -561,42 +597,26 @@ static int debug_open(struct inode *inode, struct file *file) goto out; found: - - /* Make snapshot of current debug areas to get it consistent. */ - /* To copy all the areas is only needed, if we have a view which */ - /* formats the debug areas. */ - - if (!debug_info->views[i]->format_proc && !debug_info->views[i]->header_proc) - debug_info_snapshot = debug_info_copy(debug_info, NO_AREAS); - else - debug_info_snapshot = debug_info_copy(debug_info, ALL_AREAS); - - if (!debug_info_snapshot) { - rc = -ENOMEM; - goto out; - } - p_info = kmalloc(sizeof(file_private_info_t), GFP_KERNEL); + p_info = debug_file_private_alloc(debug_info, debug_info->views[i]); if (!p_info) { - debug_info_free(debug_info_snapshot); rc = -ENOMEM; goto out; } - p_info->offset = 0; - p_info->debug_info_snap = debug_info_snapshot; - p_info->debug_info_org = debug_info; - p_info->view = debug_info->views[i]; - p_info->act_area = 0; - p_info->act_page = 0; - p_info->act_entry = DEBUG_PROLOG_ENTRY; - p_info->act_entry_offset = 0; file->private_data = p_info; - debug_info_get(debug_info); nonseekable_open(inode, file); out: mutex_unlock(&debug_mutex); return rc; } +static void debug_file_private_free(file_private_info_t *p_info) +{ + if (p_info->debug_info_snap) + debug_info_free(p_info->debug_info_snap); + debug_info_put(p_info->debug_info_org); + kfree(p_info); +} + /* * debug_close: * - called for user close() @@ -607,10 +627,8 @@ static int debug_close(struct inode *inode, struct file *file) file_private_info_t *p_info; p_info = (file_private_info_t *) file->private_data; - if (p_info->debug_info_snap) - debug_info_free(p_info->debug_info_snap); - debug_info_put(p_info->debug_info_org); - kfree(file->private_data); + debug_file_private_free(p_info); + file->private_data = NULL; return 0; /* success */ } From 5f952dae48d034b0736593ba98b5aef84038522b Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:31 +0100 Subject: [PATCH 19/48] s390/debug: Add debug_dump() to write debug view to a string buffer The debug_dump() function allows to get the content of a debug log and view pair in a string buffer. One future application of this is to provide debug logs to the platform to be collected with hardware error logs during recovery. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 3 +++ arch/s390/kernel/debug.c | 47 +++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index a7f7bdc9e19c..ec30f6f421a7 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -114,6 +114,9 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, int buf_size, umode_t mode, uid_t uid, gid_t gid); +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size); + void debug_unregister(debug_info_t *id); void debug_set_level(debug_info_t *id, int new_level); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 463c9a19a3b5..2040b96d4cb3 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -632,6 +632,53 @@ static int debug_close(struct inode *inode, struct file *file) return 0; /* success */ } +/** + * debug_dump - Get a textual representation of debug info, or as much as fits + * @id: Debug information to use + * @view: View with which to dump the debug information + * @buf: Buffer the textual debug data representation is written to + * @buf_size: Size of the buffer, including the trailing '\0' byte + * + * This function may be used whenever a textual representation of the debug + * information is required without using an s390dbf file. + * + * Note: It is the callers responsibility to supply a view that is compatible + * with the debug information data. + * + * Return: On success returns the number of bytes written to the buffer not + * including the trailing '\0' byte. If bug_size == 0 the function returns 0. + * On failure an error code less than 0 is returned. + */ +ssize_t debug_dump(debug_info_t *id, struct debug_view *view, + char *buf, size_t buf_size) +{ + file_private_info_t *p_info; + size_t size, offset = 0; + + /* Need space for '\0' byte */ + if (buf_size < 1) + return 0; + buf_size--; + + p_info = debug_file_private_alloc(id, view); + if (!p_info) + return -ENOMEM; + + /* There is always at least the DEBUG_PROLOG_ENTRY */ + do { + size = debug_format_entry(p_info); + size = min(size, buf_size - offset); + memcpy(buf + offset, p_info->temp_buf, size); + offset += size; + if (offset >= buf_size) + break; + } while (debug_next_entry(p_info)); + debug_file_private_free(p_info); + buf[offset] = '\0'; + + return offset; +} + /* Create debugfs entries and add to internal list. */ static void _debug_register(debug_info_t *id) { From dc18c81a57e75c2abfd826164600b5b4f96f5fd9 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:32 +0100 Subject: [PATCH 20/48] s390/debug: Add a reverse mode for debug_dump() In this mode debug_dump() writes the debug log starting at the newest entry followed by earlier entries. To this end add a debug_prev_entry() helper analogous to debug_next_entry() a helper to get the latest entry which is one before the active entry and a helper to iterate either forward or backward. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 2 +- arch/s390/kernel/debug.c | 87 ++++++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ec30f6f421a7..aa0995780c10 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -115,7 +115,7 @@ debug_info_t *debug_register_mode(const char *name, int pages, int nr_areas, gid_t gid); ssize_t debug_dump(debug_info_t *id, struct debug_view *view, - char *buf, size_t buf_size); + char *buf, size_t buf_size, bool reverse); void debug_unregister(debug_info_t *id); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 2040b96d4cb3..61c393e806b2 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -354,7 +355,10 @@ static debug_info_t *debug_info_copy(debug_info_t *in, int mode) for (i = 0; i < in->nr_areas; i++) { for (j = 0; j < in->pages_per_area; j++) memcpy(rc->areas[i][j], in->areas[i][j], PAGE_SIZE); + rc->active_pages[i] = in->active_pages[i]; + rc->active_entries[i] = in->active_entries[i]; } + rc->active_area = in->active_area; out: spin_unlock_irqrestore(&in->lock, flags); return rc; @@ -461,6 +465,84 @@ static inline bool debug_next_entry(file_private_info_t *p_info) return true; } +/** + * debug_to_act_entry - Go to the currently active entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the currently active + * entry of @p_info->debug_info_snap + */ +static void debug_to_act_entry(file_private_info_t *p_info) +{ + debug_info_t *snap_id; + + snap_id = p_info->debug_info_snap; + p_info->act_area = snap_id->active_area; + p_info->act_page = snap_id->active_pages[snap_id->active_area]; + p_info->act_entry = snap_id->active_entries[snap_id->active_area]; +} + +/** + * debug_prev_entry - Go to the previous entry + * @p_info: Private info that is manipulated + * + * Sets the current position in @p_info to the previous entry. If no previous entry + * exists the current position is set left as DEBUG_PROLOG_ENTRY and the return value + * indicates that no previous entries exist. + * + * Return: True if there are more previous entries, false otherwise + */ + +static inline bool debug_prev_entry(file_private_info_t *p_info) +{ + debug_info_t *id; + + id = p_info->debug_info_snap; + if (p_info->act_entry == DEBUG_PROLOG_ENTRY) + debug_to_act_entry(p_info); + if (!id->areas) + return false; + p_info->act_entry -= id->entry_size; + /* switch to prev page, if we reached the beginning of the page */ + if (p_info->act_entry < 0) { + /* end of previous page */ + p_info->act_entry = rounddown(PAGE_SIZE, id->entry_size) - id->entry_size; + p_info->act_page--; + if (p_info->act_page < 0) { + /* previous area */ + p_info->act_area--; + p_info->act_page = id->pages_per_area - 1; + } + if (p_info->act_area < 0) + p_info->act_area = (id->nr_areas - 1) % id->nr_areas; + } + /* check full circle */ + if (id->active_area == p_info->act_area && + id->active_pages[id->active_area] == p_info->act_page && + id->active_entries[id->active_area] == p_info->act_entry) + return false; + return true; +} + +/** + * debug_move_entry - Go to next entry in either the forward or backward direction + * @p_info: Private info that is manipulated + * @reverse: If true go to the next entry in reverse i.e. previous + * + * Sets the current position in @p_info to the next (@reverse == false) or + * previous (@reverse == true) entry. + * + * Return: True if there are further entries in that direction, + * false otherwise. + */ +static bool debug_move_entry(file_private_info_t *p_info, bool reverse) +{ + if (reverse) + return debug_prev_entry(p_info); + else + return debug_next_entry(p_info); +} + /* * debug_output: * - called for user read() @@ -638,6 +720,7 @@ static int debug_close(struct inode *inode, struct file *file) * @view: View with which to dump the debug information * @buf: Buffer the textual debug data representation is written to * @buf_size: Size of the buffer, including the trailing '\0' byte + * @reverse: Go backwards from the last written entry * * This function may be used whenever a textual representation of the debug * information is required without using an s390dbf file. @@ -650,7 +733,7 @@ static int debug_close(struct inode *inode, struct file *file) * On failure an error code less than 0 is returned. */ ssize_t debug_dump(debug_info_t *id, struct debug_view *view, - char *buf, size_t buf_size) + char *buf, size_t buf_size, bool reverse) { file_private_info_t *p_info; size_t size, offset = 0; @@ -672,7 +755,7 @@ ssize_t debug_dump(debug_info_t *id, struct debug_view *view, offset += size; if (offset >= buf_size) break; - } while (debug_next_entry(p_info)); + } while (debug_move_entry(p_info, reverse)); debug_file_private_free(p_info); buf[offset] = '\0'; From 4c41a48f5f3ecd3b963cd3820d2ea41d9a8d6516 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Fri, 13 Dec 2024 14:47:33 +0100 Subject: [PATCH 21/48] s390/pci: Add pci_msg debug view to PCI report Using the newly introduced debug_dump() mechanism add formatted content of pci_debug_msg_id to the PCI report. The formatting is based on the existing sprintf format but removes caller pointer and area index and adds an column header. This will allow the platform to collect this log data together with hardware errors. This sets the reverse flag such that the newest log entries get added to the PCI report even if not all debug log entries fit. Reviewed-by: Halil Pasic Co-developed-by: Halil Pasic Signed-off-by: Halil Pasic Signed-off-by: Niklas Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/debug.h | 4 +++ arch/s390/kernel/debug.c | 8 +++--- arch/s390/pci/pci_report.c | 47 +++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index aa0995780c10..6375276d94ea 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -85,6 +85,10 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, int area, debug_entry_t *entry, char *out_buf, size_t out_buf_size); +#define DEBUG_SPRINTF_MAX_ARGS 10 +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, + const char *inbuf); struct debug_view { char name[DEBUG_MAX_NAME_LEN]; debug_prolog_proc_t *prolog_proc; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 61c393e806b2..ba6b7329a10e 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -95,9 +95,6 @@ static int debug_input_flush_fn(debug_info_t *id, struct debug_view *view, static int debug_hex_ascii_format_fn(debug_info_t *id, struct debug_view *view, char *out_buf, size_t out_buf_size, const char *in_buf); -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, - const char *inbuf); static void debug_areas_swap(debug_info_t *a, debug_info_t *b); static void debug_events_append(debug_info_t *dest, debug_info_t *src); @@ -1685,8 +1682,8 @@ EXPORT_SYMBOL(debug_dflt_header_fn); #define DEBUG_SPRINTF_MAX_ARGS 10 -static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, - char *out_buf, size_t out_buf_size, const char *inbuf) +int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size, const char *inbuf) { debug_sprintf_entry_t *curr_event = (debug_sprintf_entry_t *)inbuf; int num_longs, num_used_args = 0, i, rc = 0; @@ -1723,6 +1720,7 @@ static int debug_sprintf_format_fn(debug_info_t *id, struct debug_view *view, out: return rc; } +EXPORT_SYMBOL(debug_sprintf_format_fn); /* * debug_init: diff --git a/arch/s390/pci/pci_report.c b/arch/s390/pci/pci_report.c index 2754c9c161f5..1b494e5ecc4d 100644 --- a/arch/s390/pci/pci_report.c +++ b/arch/s390/pci/pci_report.c @@ -15,6 +15,8 @@ #include #include +#include +#include #include "pci_report.h" @@ -48,6 +50,44 @@ static const char *zpci_state_str(pci_channel_state_t state) }; } +static int debug_log_header_fn(debug_info_t *id, struct debug_view *view, + int area, debug_entry_t *entry, char *out_buf, + size_t out_buf_size) +{ + unsigned long sec, usec; + unsigned int level; + char *except_str; + int rc = 0; + + level = entry->level; + sec = entry->clock; + usec = do_div(sec, USEC_PER_SEC); + + if (entry->exception) + except_str = "*"; + else + except_str = "-"; + rc += scnprintf(out_buf, out_buf_size, "%011ld:%06lu %1u %1s %04u ", + sec, usec, level, except_str, + entry->cpu); + return rc; +} + +static int debug_prolog_header(debug_info_t *id, struct debug_view *view, + char *out_buf, size_t out_buf_size) +{ + return scnprintf(out_buf, out_buf_size, "sec:usec level except cpu msg\n"); +} + +static struct debug_view debug_log_view = { + "pci_msg_log", + &debug_prolog_header, + &debug_log_header_fn, + &debug_sprintf_format_fn, + NULL, + NULL +}; + /** * zpci_report_status - Report the status of operations on a PCI device * @zdev: The PCI device for which to report status @@ -59,6 +99,8 @@ static const char *zpci_state_str(pci_channel_state_t state) * Event Data mechanism. Besides the operation and status strings the report * also contains additional information about the device deemed useful for * debug such as the currently bound device driver, if any, and error state. + * Additionally a string representation of pci_debug_msg_id, or as much as fits, + * is also included. * * Return: 0 on success an error code < 0 otherwise. */ @@ -92,6 +134,11 @@ int zpci_report_status(struct zpci_dev *zdev, const char *operation, const char buf += scnprintf(buf, end - buf, "state: %s\n", (pdev) ? zpci_state_str(pdev->error_state) : "n/a"); buf += scnprintf(buf, end - buf, "driver: %s\n", (driver) ? driver->name : "n/a"); + ret = debug_dump(pci_debug_msg_id, &debug_log_view, buf, end - buf, true); + if (ret < 0) + pr_err("Reading PCI debug messages failed with code %d\n", ret); + else + buf += ret; report->header.version = 1; report->header.action = SCLP_ERRNOTIFY_AQ_INFO_LOG; From 62b87e0c9a2cb35a74b47766743135e175f488b4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:16 +0100 Subject: [PATCH 22/48] s390/mm: Remove incorrect comment Remove an outdated comment that is also located at a random place. The generic statement that read permissions imply execute permissions is wrong since the instruction execution-protection facility is available. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 3b21aecf69ac..235360314f67 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -461,13 +461,6 @@ static inline int is_module_addr(void *addr) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY) -/* - * On s390 the page table entry has an invalid bit and a read-only bit. - * Read permission implies execute permission and write permission - * implies read permission. - */ - /*xwr*/ - /* * Segment entry (large page) protection definitions. */ From db449b147cef0a210c61dc8840424456fbe45cc8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:17 +0100 Subject: [PATCH 23/48] s390/mm: Remove unused PAGE_KERNEL_EXEC and friends Remove unused PAGE_KERNEL_EXEC, SEGMENT_KERNEL_EXEC, and REGION3_KERNEL_EXEC. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/pgtable.h | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 235360314f67..889974fc9810 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -458,8 +458,6 @@ static inline int is_module_addr(void *addr) _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) #define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) -#define PAGE_KERNEL_EXEC __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ - _PAGE_YOUNG | _PAGE_DIRTY) /* * Segment entry (large page) protection definitions. @@ -494,12 +492,6 @@ static inline int is_module_addr(void *addr) _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_EXEC __pgprot(_SEGMENT_ENTRY | \ - _SEGMENT_ENTRY_LARGE | \ - _SEGMENT_ENTRY_READ | \ - _SEGMENT_ENTRY_WRITE | \ - _SEGMENT_ENTRY_YOUNG | \ - _SEGMENT_ENTRY_DIRTY) /* * Region3 entry (large page) protection definitions. @@ -520,13 +512,6 @@ static inline int is_module_addr(void *addr) _REGION3_ENTRY_YOUNG | \ _REGION_ENTRY_PROTECT | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_EXEC __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ - _REGION3_ENTRY_DIRTY) static inline bool mm_p4d_folded(struct mm_struct *mm) { From f8107a8be0b2e92798ded4aff45e3a94b763ce61 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 9 Dec 2024 10:45:18 +0100 Subject: [PATCH 24/48] s390/mm: Simplify noexec page protection handling By default page protection definitions like PAGE_RX have the _PAGE_NOEXEC bit set. For older machines without the instruction execution protection facility this bit is not allowed to be used in page table entries, and therefore must be removed. This is done at a couple of page table walkers, but also at some but not all page table modification functions like ptep_modify_prot_commit(). Avoid all of this and change the page, segment and region3 protection definitions so that the noexec bit is masked out automatically if the instruction execution-protection facility is not available. This is similar to what also various other architectures do which had to solve the same problem. Reviewed-by: Gerald Schaefer Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/boot/boot.h | 1 - arch/s390/boot/startup.c | 13 ++++- arch/s390/boot/vmem.c | 8 --- arch/s390/include/asm/pgtable.h | 93 +++++++++++++++++++++------------ arch/s390/kernel/setup.c | 1 + arch/s390/mm/init.c | 9 ++++ arch/s390/mm/mmap.c | 42 ++++++++------- arch/s390/mm/pageattr.c | 6 --- arch/s390/mm/pgtable.c | 2 - arch/s390/mm/vmem.c | 8 --- 10 files changed, 106 insertions(+), 77 deletions(-) diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 7521a9d75fa2..56244fe78182 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -13,7 +13,6 @@ struct machine_info { unsigned char has_edat1 : 1; unsigned char has_edat2 : 1; - unsigned char has_nx : 1; }; struct vmlinux_info { diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index abe6e6c0ab98..e00aed22d0d4 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,9 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +unsigned long __bootdata_preserved(page_noexec_mask); +unsigned long __bootdata_preserved(segment_noexec_mask); +unsigned long __bootdata_preserved(region_noexec_mask); int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -51,8 +54,14 @@ static void detect_facilities(void) } if (test_facility(78)) machine.has_edat2 = 1; - if (test_facility(130)) - machine.has_nx = 1; + page_noexec_mask = -1UL; + segment_noexec_mask = -1UL; + region_noexec_mask = -1UL; + if (!test_facility(130)) { + page_noexec_mask &= ~_PAGE_NOEXEC; + segment_noexec_mask &= ~_SEGMENT_ENTRY_NOEXEC; + region_noexec_mask &= ~_REGION_ENTRY_NOEXEC; + } } static int cmma_test_essa(void) diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 00a3c59ac218..a0c97cde5146 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -67,8 +67,6 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern int i; pte_z = __pte(__pa(kasan_early_shadow_page) | pgprot_val(PAGE_KERNEL_RO)); - if (!machine.has_nx) - pte_z = clear_pte_bit(pte_z, __pgprot(_PAGE_NOEXEC)); crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z)); crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); @@ -294,8 +292,6 @@ static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long e continue; entry = __pte(_pa(addr, PAGE_SIZE, mode)); entry = set_pte_bit(entry, PAGE_KERNEL); - if (!machine.has_nx) - entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC)); set_pte(pte, entry); pages++; } @@ -320,8 +316,6 @@ static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long e if (can_large_pmd(pmd, addr, next, mode)) { entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode)); entry = set_pmd_bit(entry, SEGMENT_KERNEL); - if (!machine.has_nx) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmd, entry); pages++; continue; @@ -353,8 +347,6 @@ static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long e if (can_large_pud(pud, addr, next, mode)) { entry = __pud(_pa(addr, _REGION3_SIZE, mode)); entry = set_pud_bit(entry, REGION3_KERNEL); - if (!machine.has_nx) - entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC)); set_pud(pud, entry); pages++; continue; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 889974fc9810..a3b51056a177 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -124,6 +124,8 @@ static inline int is_module_addr(void *addr) #define KASLR_LEN 0UL #endif +void setup_protection_map(void); + /* * A 64 bit pagetable entry of S390 has following format: * | PFRA |0IPC| OS | @@ -442,76 +444,107 @@ static inline int is_module_addr(void *addr) /* * Page protection definitions. */ -#define PAGE_NONE __pgprot(_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_NONE (_PAGE_PRESENT | _PAGE_INVALID | _PAGE_PROTECT) +#define __PAGE_RO (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RX __pgprot(_PAGE_PRESENT | _PAGE_READ | \ +#define __PAGE_RX (_PAGE_PRESENT | _PAGE_READ | \ _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RW __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RW (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_NOEXEC | _PAGE_INVALID | _PAGE_PROTECT) -#define PAGE_RWX __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_RWX (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_INVALID | _PAGE_PROTECT) - -#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_SHARED (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ +#define __PAGE_KERNEL (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \ _PAGE_YOUNG | _PAGE_DIRTY | _PAGE_NOEXEC) -#define PAGE_KERNEL_RO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ +#define __PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_YOUNG | \ _PAGE_PROTECT | _PAGE_NOEXEC) +extern unsigned long page_noexec_mask; + +#define __pgprot_page_mask(x) __pgprot((x) & page_noexec_mask) + +#define PAGE_NONE __pgprot_page_mask(__PAGE_NONE) +#define PAGE_RO __pgprot_page_mask(__PAGE_RO) +#define PAGE_RX __pgprot_page_mask(__PAGE_RX) +#define PAGE_RW __pgprot_page_mask(__PAGE_RW) +#define PAGE_RWX __pgprot_page_mask(__PAGE_RWX) +#define PAGE_SHARED __pgprot_page_mask(__PAGE_SHARED) +#define PAGE_KERNEL __pgprot_page_mask(__PAGE_KERNEL) +#define PAGE_KERNEL_RO __pgprot_page_mask(__PAGE_KERNEL_RO) + /* * Segment entry (large page) protection definitions. */ -#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_NONE (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_INVALID | \ _SEGMENT_ENTRY_PROTECT) -#define SEGMENT_RO __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RO (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_READ) -#define SEGMENT_RW __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RW (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_RWX __pgprot(_SEGMENT_ENTRY_PRESENT | \ +#define __SEGMENT_RWX (_SEGMENT_ENTRY_PRESENT | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE) -#define SEGMENT_KERNEL __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_WRITE | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_DIRTY | \ _SEGMENT_ENTRY_NOEXEC) -#define SEGMENT_KERNEL_RO __pgprot(_SEGMENT_ENTRY | \ +#define __SEGMENT_KERNEL_RO (_SEGMENT_ENTRY | \ _SEGMENT_ENTRY_LARGE | \ _SEGMENT_ENTRY_READ | \ _SEGMENT_ENTRY_YOUNG | \ _SEGMENT_ENTRY_PROTECT | \ _SEGMENT_ENTRY_NOEXEC) +extern unsigned long segment_noexec_mask; + +#define __pgprot_segment_mask(x) __pgprot((x) & segment_noexec_mask) + +#define SEGMENT_NONE __pgprot_segment_mask(__SEGMENT_NONE) +#define SEGMENT_RO __pgprot_segment_mask(__SEGMENT_RO) +#define SEGMENT_RX __pgprot_segment_mask(__SEGMENT_RX) +#define SEGMENT_RW __pgprot_segment_mask(__SEGMENT_RW) +#define SEGMENT_RWX __pgprot_segment_mask(__SEGMENT_RWX) +#define SEGMENT_KERNEL __pgprot_segment_mask(__SEGMENT_KERNEL) +#define SEGMENT_KERNEL_RO __pgprot_segment_mask(__SEGMENT_KERNEL_RO) + /* * Region3 entry (large page) protection definitions. */ -#define REGION3_KERNEL __pgprot(_REGION_ENTRY_TYPE_R3 | \ +#define __REGION3_KERNEL (_REGION_ENTRY_TYPE_R3 | \ _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_WRITE | \ - _REGION3_ENTRY_YOUNG | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_WRITE | \ + _REGION3_ENTRY_YOUNG | \ _REGION3_ENTRY_DIRTY | \ _REGION_ENTRY_NOEXEC) -#define REGION3_KERNEL_RO __pgprot(_REGION_ENTRY_TYPE_R3 | \ - _REGION3_ENTRY_PRESENT | \ - _REGION3_ENTRY_LARGE | \ - _REGION3_ENTRY_READ | \ - _REGION3_ENTRY_YOUNG | \ - _REGION_ENTRY_PROTECT | \ - _REGION_ENTRY_NOEXEC) +#define __REGION3_KERNEL_RO (_REGION_ENTRY_TYPE_R3 | \ + _REGION3_ENTRY_PRESENT | \ + _REGION3_ENTRY_LARGE | \ + _REGION3_ENTRY_READ | \ + _REGION3_ENTRY_YOUNG | \ + _REGION_ENTRY_PROTECT | \ + _REGION_ENTRY_NOEXEC) + +extern unsigned long region_noexec_mask; + +#define __pgprot_region_mask(x) __pgprot((x) & region_noexec_mask) + +#define REGION3_KERNEL __pgprot_region_mask(__REGION3_KERNEL) +#define REGION3_KERNEL_RO __pgprot_region_mask(__REGION3_KERNEL_RO) static inline bool mm_p4d_folded(struct mm_struct *mm) { @@ -1412,8 +1445,6 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) pte_t __pte; __pte = __pte(physpage | pgprot_val(pgprot)); - if (!MACHINE_HAS_NX) - __pte = clear_pte_bit(__pte, __pgprot(_PAGE_NOEXEC)); return pte_mkyoung(__pte); } @@ -1781,8 +1812,6 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) { - if (!MACHINE_HAS_NX) - entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC)); set_pmd(pmdp, entry); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 95324aaa17e2..0ce550faf073 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -971,6 +971,7 @@ void __init setup_arch(char **cmdline_p) if (test_facility(193)) static_branch_enable(&cpu_has_bear); + setup_protection_map(); /* * Create kernel page tables. */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 7a96623a9d2e..f2298f7a3f21 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -56,6 +56,15 @@ pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir"); struct ctlreg __bootdata_preserved(s390_invalid_asce); +unsigned long __bootdata_preserved(page_noexec_mask); +EXPORT_SYMBOL(page_noexec_mask); + +unsigned long __bootdata_preserved(segment_noexec_mask); +EXPORT_SYMBOL(segment_noexec_mask); + +unsigned long __bootdata_preserved(region_noexec_mask); +EXPORT_SYMBOL(region_noexec_mask); + unsigned long empty_zero_page, zero_page_mask; EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(zero_page_mask); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 33f3504be90b..76f376876e0d 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -196,22 +196,28 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) } } -static const pgprot_t protection_map[16] = { - [VM_NONE] = PAGE_NONE, - [VM_READ] = PAGE_RO, - [VM_WRITE] = PAGE_RO, - [VM_WRITE | VM_READ] = PAGE_RO, - [VM_EXEC] = PAGE_RX, - [VM_EXEC | VM_READ] = PAGE_RX, - [VM_EXEC | VM_WRITE] = PAGE_RX, - [VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX, - [VM_SHARED] = PAGE_NONE, - [VM_SHARED | VM_READ] = PAGE_RO, - [VM_SHARED | VM_WRITE] = PAGE_RW, - [VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW, - [VM_SHARED | VM_EXEC] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX, - [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX, - [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX -}; +static pgprot_t protection_map[16] __ro_after_init; + +void __init setup_protection_map(void) +{ + pgprot_t *pm = protection_map; + + pm[VM_NONE] = PAGE_NONE; + pm[VM_READ] = PAGE_RO; + pm[VM_WRITE] = PAGE_RO; + pm[VM_WRITE | VM_READ] = PAGE_RO; + pm[VM_EXEC] = PAGE_RX; + pm[VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_EXEC | VM_WRITE] = PAGE_RX; + pm[VM_EXEC | VM_WRITE | VM_READ] = PAGE_RX; + pm[VM_SHARED] = PAGE_NONE; + pm[VM_SHARED | VM_READ] = PAGE_RO; + pm[VM_SHARED | VM_WRITE] = PAGE_RW; + pm[VM_SHARED | VM_WRITE | VM_READ] = PAGE_RW; + pm[VM_SHARED | VM_EXEC] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_READ] = PAGE_RX; + pm[VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_RWX; + pm[VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_RWX; +} + DECLARE_VM_GET_PAGE_PROT diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 8f56a21a077f..eae97fb61712 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -109,8 +109,6 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, } else if (flags & SET_MEMORY_DEF) { new = __pte(pte_val(new) & PAGE_MASK); new = set_pte_bit(new, PAGE_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pte_bit(new, __pgprot(_PAGE_NOEXEC)); } pgt_set((unsigned long *)ptep, pte_val(new), addr, CRDTE_DTT_PAGE); ptep++; @@ -167,8 +165,6 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pmd(pmd_val(new) & PMD_MASK); new = set_pmd_bit(new, SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pmdp, pmd_val(new), addr, CRDTE_DTT_SEGMENT); } @@ -256,8 +252,6 @@ static void modify_pud_page(pud_t *pudp, unsigned long addr, } else if (flags & SET_MEMORY_DEF) { new = __pud(pud_val(new) & PUD_MASK); new = set_pud_bit(new, REGION3_KERNEL); - if (!MACHINE_HAS_NX) - new = clear_pud_bit(new, __pgprot(_REGION_ENTRY_NOEXEC)); } pgt_set((unsigned long *)pudp, pud_val(new), addr, CRDTE_DTT_REGION3); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index cea5dba80468..f05e62e037c2 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -360,8 +360,6 @@ void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pgste_t pgste; struct mm_struct *mm = vma->vm_mm; - if (!MACHINE_HAS_NX) - pte = clear_pte_bit(pte, __pgprot(_PAGE_NOEXEC)); if (mm_has_pgste(mm)) { pgste = pgste_get(ptep); pgste_set_key(ptep, pgste, pte, mm); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 665b8228afeb..7c684c54e721 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -171,9 +171,6 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr, pte_t *pte; prot = pgprot_val(PAGE_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_PAGE_NOEXEC; - pte = pte_offset_kernel(pmd, addr); for (; addr < end; addr += PAGE_SIZE, pte++) { if (!add) { @@ -230,9 +227,6 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr, pte_t *pte; prot = pgprot_val(SEGMENT_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_SEGMENT_ENTRY_NOEXEC; - pmd = pmd_offset(pud, addr); for (; addr < end; addr = next, pmd++) { next = pmd_addr_end(addr, end); @@ -324,8 +318,6 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end, pmd_t *pmd; prot = pgprot_val(REGION3_KERNEL); - if (!MACHINE_HAS_NX) - prot &= ~_REGION_ENTRY_NOEXEC; pud = pud_offset(p4d, addr); for (; addr < end; addr = next, pud++) { next = pud_addr_end(addr, end); From 807e39ed4da2cd7e6fe2c38a8d41d36f4c59e861 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Thu, 12 Dec 2024 10:55:03 +0100 Subject: [PATCH 25/48] s390/lib: Use exrl instead of ex in string functions exrl is present in all machines currently supported in the linux kernel, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/lib/mem.S | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 08f60a42b9a6..d026debf250c 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -34,8 +34,7 @@ SYM_FUNC_START(__memmove) la %r3,256(%r3) brctg %r0,.Lmemmove_forward_loop .Lmemmove_forward_remainder: - larl %r5,.Lmemmove_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemmove_mvc .Lmemmove_exit: BR_EX %r14 .Lmemmove_reverse: @@ -83,8 +82,7 @@ SYM_FUNC_START(__memset) la %r1,256(%r1) brctg %r3,.Lmemset_clear_loop .Lmemset_clear_remainder: - larl %r3,.Lmemset_xc - ex %r4,0(%r3) + exrl %r4,.Lmemset_xc .Lmemset_exit: BR_EX %r14 .Lmemset_fill: @@ -102,8 +100,7 @@ SYM_FUNC_START(__memset) brctg %r5,.Lmemset_fill_loop .Lmemset_fill_remainder: stc %r3,0(%r1) - larl %r5,.Lmemset_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemset_mvc BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) @@ -132,8 +129,7 @@ SYM_FUNC_START(__memcpy) lgr %r1,%r2 jnz .Lmemcpy_loop .Lmemcpy_remainder: - larl %r5,.Lmemcpy_mvc - ex %r4,0(%r5) + exrl %r4,.Lmemcpy_mvc .Lmemcpy_exit: BR_EX %r14 .Lmemcpy_loop: @@ -175,8 +171,7 @@ SYM_FUNC_START(__memset\bits) brctg %r5,.L__memset_loop\bits .L__memset_remainder\bits: \insn %r3,0(%r1) - larl %r5,.L__memset_mvc\bits - ex %r4,0(%r5) + exrl %r4,.L__memset_mvc\bits BR_EX %r14 .L__memset_store\bits: \insn %r3,0(%r2) From 2478d43ed621c4d15549142c0cfe6fa4a05e5f10 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:18 +0100 Subject: [PATCH 26/48] s390/diag: Create misc device /dev/diag Create a misc device /dev/diag to fetch diagnose specific information from the kernel and provide it to userspace. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/diag/Makefile | 1 + arch/s390/kernel/diag/diag_misc.c | 45 +++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 arch/s390/kernel/diag/Makefile create mode 100644 arch/s390/kernel/diag/diag_misc.c diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile new file mode 100644 index 000000000000..15bfa866c44d --- /dev/null +++ b/arch/s390/kernel/diag/Makefile @@ -0,0 +1 @@ +obj-y := diag_misc.o diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c new file mode 100644 index 000000000000..cd5b78880d7d --- /dev/null +++ b/arch/s390/kernel/diag/diag_misc.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Provide diagnose information via misc device /dev/diag. + * + * Copyright IBM Corp. 2024 + */ + +#include +#include +#include +#include +#include +#include + +static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + long rc; + + switch (cmd) { + default: + rc = -ENOIOCTLCMD; + break; + } + return rc; +} + +static const struct file_operations fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = diag_ioctl, +}; + +static struct miscdevice diagdev = { + .name = "diag", + .minor = MISC_DYNAMIC_MINOR, + .fops = &fops, + .mode = 0444, +}; + +static int diag_init(void) +{ + return misc_register(&diagdev); +} + +device_initcall(diag_init); From 90e6f191e1ee094d13faae5fc29af7b5baf9d1b8 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:19 +0100 Subject: [PATCH 27/48] s390/diag324: Retrieve power readings via diag 0x324 Retrieve electrical power readings for resources in a computing environment via diag 0x324. diag 0x324 stores the power readings in the power information block (pib). Provide power readings from pib via diag324 ioctl interface. diag324 ioctl provides new pib to the user only if the threshold time has passed since the last call. Otherwise, cache data is returned. When there are no active readers, cleanup of pib buffer is performed. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/sclp.h | 1 + arch/s390/include/uapi/asm/diag.h | 24 ++++ arch/s390/kernel/Makefile | 1 + arch/s390/kernel/diag.c | 1 + arch/s390/kernel/diag/Makefile | 2 +- arch/s390/kernel/diag/diag324.c | 224 +++++++++++++++++++++++++++++ arch/s390/kernel/diag/diag_ioctl.h | 10 ++ arch/s390/kernel/diag/diag_misc.c | 9 ++ drivers/s390/char/sclp.h | 4 +- drivers/s390/char/sclp_early.c | 2 + 11 files changed, 277 insertions(+), 2 deletions(-) create mode 100644 arch/s390/include/uapi/asm/diag.h create mode 100644 arch/s390/kernel/diag/diag324.c create mode 100644 arch/s390/kernel/diag/diag_ioctl.h diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index e1316e181230..c1050f4a7107 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -38,6 +38,7 @@ enum diag_stat_enum { DIAG_STAT_X308, DIAG_STAT_X318, DIAG_STAT_X320, + DIAG_STAT_X324, DIAG_STAT_X49C, DIAG_STAT_X500, NR_DIAG_STAT diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index eb00fa1771da..de369a062545 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -89,6 +89,7 @@ struct sclp_info { unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; + unsigned char has_diag324 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h new file mode 100644 index 000000000000..361c9ba9d4f5 --- /dev/null +++ b/arch/s390/include/uapi/asm/diag.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Diag ioctls and its associated structures definitions. + * + * Copyright IBM Corp. 2024 + */ + +#ifndef __S390_UAPI_ASM_DIAG_H +#define __S390_UAPI_ASM_DIAG_H + +#include + +#define DIAG_MAGIC_STR 'D' + +struct diag324_pib { + __u64 address; + __u64 sequence; +}; + +/* Diag ioctl definitions */ +#define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib) +#define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t) + +#endif /* __S390_UAPI_ASM_DIAG_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 48caae8c7e10..887450df44bd 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -44,6 +44,7 @@ obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o wti.o +obj-y += diag/ extra-y += vmlinux.lds diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index cdd6e31344fa..d786b9e58a82 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -53,6 +53,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, + [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, [DIAG_STAT_X49C] = { .code = 0x49c, .name = "Warning-Track Interruption" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile index 15bfa866c44d..6083c1fd2f4b 100644 --- a/arch/s390/kernel/diag/Makefile +++ b/arch/s390/kernel/diag/Makefile @@ -1 +1 @@ -obj-y := diag_misc.o +obj-y := diag_misc.o diag324.o diff --git a/arch/s390/kernel/diag/diag324.c b/arch/s390/kernel/diag/diag324.c new file mode 100644 index 000000000000..7fa4c0b7eb6c --- /dev/null +++ b/arch/s390/kernel/diag/diag324.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request power readings for resources in a computing environment via + * diag 0x324. diag 0x324 stores the power readings in the power information + * block (pib). + * + * Copyright IBM Corp. 2024 + */ + +#define pr_fmt(fmt) "diag324: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "diag_ioctl.h" + +enum subcode { + DIAG324_SUBC_0 = 0, + DIAG324_SUBC_1 = 1, + DIAG324_SUBC_2 = 2, +}; + +enum retcode { + DIAG324_RET_SUCCESS = 0x0001, + DIAG324_RET_SUBC_NOTAVAIL = 0x0103, + DIAG324_RET_INSUFFICIENT_SIZE = 0x0104, + DIAG324_RET_READING_UNAVAILABLE = 0x0105, +}; + +union diag324_response { + u64 response; + struct { + u64 installed : 32; + u64 : 16; + u64 rc : 16; + } sc0; + struct { + u64 format : 16; + u64 : 16; + u64 pib_len : 16; + u64 rc : 16; + } sc1; + struct { + u64 : 48; + u64 rc : 16; + } sc2; +}; + +union diag324_request { + u64 request; + struct { + u64 : 32; + u64 allocated : 16; + u64 : 12; + u64 sc : 4; + } sc2; +}; + +struct pib { + u32 : 8; + u32 num : 8; + u32 len : 16; + u32 : 24; + u32 hlen : 8; + u64 : 64; + u64 intv; + u8 r[]; +} __packed; + +struct pibdata { + struct pib *pib; + ktime_t expire; + u64 sequence; + size_t len; + int rc; +}; + +static DEFINE_MUTEX(pibmutex); +static struct pibdata pibdata; + +#define PIBWORK_DELAY (5 * NSEC_PER_SEC) + +static void pibwork_handler(struct work_struct *work); +static DECLARE_DELAYED_WORK(pibwork, pibwork_handler); + +static unsigned long diag324(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr }; + + diag_stat_inc(DIAG_STAT_X324); + asm volatile("diag %[rp],%[subcode],0x324\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static void pibwork_handler(struct work_struct *work) +{ + struct pibdata *data = &pibdata; + ktime_t timedout; + + mutex_lock(&pibmutex); + timedout = ktime_add_ns(data->expire, PIBWORK_DELAY); + if (ktime_before(ktime_get(), timedout)) { + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + goto out; + } + vfree(data->pib); + data->pib = NULL; +out: + mutex_unlock(&pibmutex); +} + +static void pib_update(struct pibdata *data) +{ + union diag324_request req = { .sc2.sc = DIAG324_SUBC_2, .sc2.allocated = data->len }; + union diag324_response res; + int rc; + + memset(data->pib, 0, data->len); + res.response = diag324(req.request, data->pib); + switch (res.sc2.rc) { + case DIAG324_RET_SUCCESS: + rc = 0; + break; + case DIAG324_RET_SUBC_NOTAVAIL: + rc = -ENOENT; + break; + case DIAG324_RET_INSUFFICIENT_SIZE: + rc = -EMSGSIZE; + break; + case DIAG324_RET_READING_UNAVAILABLE: + rc = -EBUSY; + break; + default: + rc = -EINVAL; + } + data->rc = rc; +} + +long diag324_pibbuf(unsigned long arg) +{ + struct diag324_pib __user *udata = (struct diag324_pib __user *)arg; + struct pibdata *data = &pibdata; + static bool first = true; + u64 address; + int rc; + + if (!data->len) + return -EOPNOTSUPP; + if (get_user(address, &udata->address)) + return -EFAULT; + mutex_lock(&pibmutex); + rc = -ENOMEM; + if (!data->pib) + data->pib = vmalloc(data->len); + if (!data->pib) + goto out; + if (first || ktime_after(ktime_get(), data->expire)) { + pib_update(data); + data->sequence++; + data->expire = ktime_add_ns(ktime_get(), tod_to_ns(data->pib->intv)); + mod_delayed_work(system_wq, &pibwork, nsecs_to_jiffies(PIBWORK_DELAY)); + first = false; + } + rc = data->rc; + if (rc != 0 && rc != -EBUSY) + goto out; + rc = copy_to_user((void __user *)address, data->pib, data->pib->len); + rc |= put_user(data->sequence, &udata->sequence); + if (rc) + rc = -EFAULT; +out: + mutex_unlock(&pibmutex); + return rc; +} + +long diag324_piblen(unsigned long arg) +{ + struct pibdata *data = &pibdata; + + if (!data->len) + return -EOPNOTSUPP; + if (put_user(data->len, (size_t __user *)arg)) + return -EFAULT; + return 0; +} + +static int __init diag324_init(void) +{ + union diag324_response res; + unsigned long installed; + + if (!sclp.has_diag324) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_0, NULL); + if (res.sc0.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + installed = res.response; + if (!test_bit_inv(DIAG324_SUBC_1, &installed)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG324_SUBC_2, &installed)) + return -EOPNOTSUPP; + res.response = diag324(DIAG324_SUBC_1, NULL); + if (res.sc1.rc != DIAG324_RET_SUCCESS) + return -EOPNOTSUPP; + pibdata.len = res.sc1.pib_len; + return 0; +} +device_initcall(diag324_init); diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h new file mode 100644 index 000000000000..de5365b23e11 --- /dev/null +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _DIAG_IOCTL_H +#define _DIAG_IOCTL_H + +#include + +long diag324_pibbuf(unsigned long arg); +long diag324_piblen(unsigned long arg); + +#endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c index cd5b78880d7d..1f4f5c757329 100644 --- a/arch/s390/kernel/diag/diag_misc.c +++ b/arch/s390/kernel/diag/diag_misc.c @@ -12,11 +12,20 @@ #include #include +#include +#include "diag_ioctl.h" + static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { long rc; switch (cmd) { + case DIAG324_GET_PIBLEN: + rc = diag324_piblen(arg); + break; + case DIAG324_GET_PIBBUF: + rc = diag324_pibbuf(arg); + break; default: rc = -ENOIOCTLCMD; break; diff --git a/drivers/s390/char/sclp.h b/drivers/s390/char/sclp.h index 6c91e422927f..847674ca94c3 100644 --- a/drivers/s390/char/sclp.h +++ b/drivers/s390/char/sclp.h @@ -196,7 +196,9 @@ struct read_info_sccb { u8 byte_134; /* 134 */ u8 cpudirq; /* 135 */ u16 cbl; /* 136-137 */ - u8 _pad_138[EXT_SCCB_READ_SCP - 138]; + u8 byte_138; /* 138 */ + u8 byte_139; /* 139 */ + u8 _pad_140[EXT_SCCB_READ_SCP - 140]; } __packed __aligned(PAGE_SIZE); struct read_storage_sccb { diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 29156455970e..b7a55ea6c77a 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -64,6 +64,8 @@ static void __init sclp_early_facilities_detect(void) sclp.has_sipl = !!(sccb->cbl & 0x4000); sclp.has_sipl_eckd = !!(sccb->cbl & 0x2000); } + if (sccb->cpuoff > 139) + sclp.has_diag324 = !!(sccb->byte_139 & 0x80); sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; sclp.rzm = sccb->rnsize ? sccb->rnsize : sccb->rnsize2; sclp.rzm <<= 20; From 388cf16d90f6cce74ce531f306e2c77d6d66f2f7 Mon Sep 17 00:00:00 2001 From: Sumanth Korikkar Date: Thu, 12 Dec 2024 17:17:20 +0100 Subject: [PATCH 28/48] s390/diag: Move diag.c to diag specific folder Move implementation of s390 diagnose code to diag specific folder. Reviewed-by: Heiko Carstens Signed-off-by: Sumanth Korikkar Signed-off-by: Alexander Gordeev --- arch/s390/kernel/Makefile | 2 +- arch/s390/kernel/diag/Makefile | 2 +- arch/s390/kernel/{ => diag}/diag.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename arch/s390/kernel/{ => diag}/diag.c (99%) diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 887450df44bd..db5f3a3faefb 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -38,7 +38,7 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o +obj-y += debug.o irq.o ipl.o dis.o vdso.o cpufeature.o obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile index 6083c1fd2f4b..44d3e0471151 100644 --- a/arch/s390/kernel/diag/Makefile +++ b/arch/s390/kernel/diag/Makefile @@ -1 +1 @@ -obj-y := diag_misc.o diag324.o +obj-y := diag_misc.o diag324.o diag.o diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag/diag.c similarity index 99% rename from arch/s390/kernel/diag.c rename to arch/s390/kernel/diag/diag.c index d786b9e58a82..91161f09299f 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -17,7 +17,7 @@ #include #include #include -#include "entry.h" +#include "../entry.h" struct diag_stat { unsigned int counter[NR_DIAG_STAT]; From bc3d4402a09cf072a56945e615fc587f8c1bec04 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Thu, 23 Mar 2023 17:40:41 +0100 Subject: [PATCH 29/48] s390/qdio: Rename feature flag aif_osa to aif_qdio This feature is not only utilized by OSA, but by QDIO in general. Clear up possible confusions. Signed-off-by: Benjamin Block Reviewed-by: Steffen Maier Acked-by: Alexandra Winter Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/css_chars.h | 2 +- drivers/s390/cio/qdio.h | 2 +- drivers/s390/cio/qdio_setup.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h index 638137d46c85..a03f64033760 100644 --- a/arch/s390/include/asm/css_chars.h +++ b/arch/s390/include/asm/css_chars.h @@ -25,7 +25,7 @@ struct css_general_char { u64 : 2; u64 : 3; - u64 aif_osa : 1; /* bit 67 */ + u64 aif_qdio : 1;/* bit 67 */ u64 : 12; u64 eadm_rf : 1; /* bit 80 */ u64 : 1; diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 641f0dbb65a9..5c09f2ef874e 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -266,7 +266,7 @@ struct qdio_irq { #define is_thinint_irq(irq) \ (irq->qib.qfmt == QDIO_IQDIO_QFMT || \ - css_general_characteristics.aif_osa) + css_general_characteristics.aif_qdio) #define qperf(__qdev, __attr) ((__qdev)->perf_stat.(__attr)) diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 99c0fd23022d..1f532b112194 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -423,7 +423,7 @@ int __init qdio_setup_init(void) /* Check for OSA/FCP thin interrupts (bit 67). */ DBF_EVENT("thinint:%1d", - (css_general_characteristics.aif_osa) ? 1 : 0); + (css_general_characteristics.aif_qdio) ? 1 : 0); /* Check for QEBSM support in general (bit 58). */ DBF_EVENT("cssQEBSM:%1d", css_general_characteristics.qebsm); From efd34db6e6813c6e573f34353ce4ad5e81f56dbd Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Tue, 4 Apr 2023 20:33:59 +0200 Subject: [PATCH 30/48] s390/cio: Use array indices instead of pointer arithmetic ccw_device_get_ciw() already uses array indices to iterate over the vector of CIWs, but then switches to pointer arithmetic when returning the one it found. Change this to make it more consistent. Signed-off-by: Benjamin Block Reviewed-by: Vineeth Vijayan Signed-off-by: Alexander Gordeev --- drivers/s390/cio/device_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index acd6790dba4d..61c07b4a0fe8 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -445,7 +445,7 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct) return NULL; for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++) if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct) - return cdev->private->dma_area->senseid.ciw + ciw_cnt; + return &cdev->private->dma_area->senseid.ciw[ciw_cnt]; return NULL; } From 30e037ad7eb44bcf56fb1a845cc718d50c363310 Mon Sep 17 00:00:00 2001 From: Benjamin Block Date: Wed, 5 Apr 2023 16:28:34 +0200 Subject: [PATCH 31/48] s390/qdio: Move memory alloc/pointer arithmetic for slib and sl into one place Instead of distributing the memory allocation and pointer arithmetic to place slib and sl on the page that is allocated for them over multiple functions and comments, move both into the same context directly next to each other, so that the knowledge of how this is done is immediately visible. The actual layout in memory doesn't change with this, just the structure of the code to achieve it. Signed-off-by: Benjamin Block Reviewed-by: Steffen Maier Reviewed-by: Alexandra Winter Signed-off-by: Alexander Gordeev --- drivers/s390/cio/qdio.h | 7 +++---- drivers/s390/cio/qdio_setup.c | 19 +++++++++++++------ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h index 5c09f2ef874e..4bd4c00c9c0c 100644 --- a/drivers/s390/cio/qdio.h +++ b/drivers/s390/cio/qdio.h @@ -210,11 +210,10 @@ struct qdio_q { qdio_handler_t (*handler); struct qdio_irq *irq_ptr; + + /* memory page (PAGE_SIZE) used to place slib and sl on */ + void *sl_page; struct sl *sl; - /* - * A page is allocated under this pointer and used for slib and sl. - * slib is 2048 bytes big and sl points to offset PAGE_SIZE / 2. - */ struct slib *slib; } __attribute__ ((aligned(256))); diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 1f532b112194..ea09aadaae4e 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -83,7 +83,7 @@ static void __qdio_free_queues(struct qdio_q **queues, unsigned int count) for (i = 0; i < count; i++) { q = queues[i]; - free_page((unsigned long) q->slib); + free_page((unsigned long)q->sl_page); kmem_cache_free(qdio_q_cache, q); } } @@ -109,12 +109,16 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) return -ENOMEM; } - q->slib = (struct slib *) __get_free_page(GFP_KERNEL); - if (!q->slib) { + q->sl_page = (void *)__get_free_page(GFP_KERNEL); + if (!q->sl_page) { kmem_cache_free(qdio_q_cache, q); __qdio_free_queues(irq_ptr_qs, i); return -ENOMEM; } + q->slib = q->sl_page; + /* As per architecture: SLIB is 2K bytes long, and SL 1K. */ + q->sl = (struct sl *)(q->slib + 1); + irq_ptr_qs[i] = q; } return 0; @@ -142,11 +146,15 @@ int qdio_allocate_qs(struct qdio_irq *irq_ptr, int nr_input_qs, int nr_output_qs static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr, qdio_handler_t *handler, int i) { - struct slib *slib = q->slib; + struct slib *const slib = q->slib; + void *const sl_page = q->sl_page; + struct sl *const sl = q->sl; /* queue must be cleared for qdio_establish */ memset(q, 0, sizeof(*q)); - memset(slib, 0, PAGE_SIZE); + memset(sl_page, 0, PAGE_SIZE); + q->sl_page = sl_page; + q->sl = sl; q->slib = slib; q->irq_ptr = irq_ptr; q->mask = 1 << (31 - i); @@ -161,7 +169,6 @@ static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr, int j; DBF_HEX(&q, sizeof(void *)); - q->sl = (struct sl *)((char *)q->slib + PAGE_SIZE / 2); /* fill in sbal */ for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; j++) From 221ce94b1ac56bacde9c55409080838c1b575d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Dec 2024 18:54:39 +0100 Subject: [PATCH 32/48] s390/crypto/cpacf: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Tested-by: Finn Callies Reviewed-by: Holger Dengler Link: https://lore.kernel.org/r/20241211-sysfs-const-bin_attr-s390-v1-1-be01f66bfcf7@weissschuh.net Signed-off-by: Alexander Gordeev --- arch/s390/kernel/cpacf.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/s390/kernel/cpacf.c b/arch/s390/kernel/cpacf.c index c8575dbc890d..4b9b34f95d72 100644 --- a/arch/s390/kernel/cpacf.c +++ b/arch/s390/kernel/cpacf.c @@ -14,7 +14,7 @@ #define CPACF_QUERY(name, instruction) \ static ssize_t name##_query_raw_read(struct file *fp, \ struct kobject *kobj, \ - struct bin_attribute *attr, \ + const struct bin_attribute *attr, \ char *buf, loff_t offs, \ size_t count) \ { \ @@ -24,7 +24,7 @@ static ssize_t name##_query_raw_read(struct file *fp, \ return -EOPNOTSUPP; \ return memory_read_from_buffer(buf, count, &offs, &mask, sizeof(mask)); \ } \ -static BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) +static const BIN_ATTR_RO(name##_query_raw, sizeof(cpacf_mask_t)) CPACF_QUERY(km, KM); CPACF_QUERY(kmc, KMC); @@ -40,20 +40,20 @@ CPACF_QUERY(prno, PRNO); CPACF_QUERY(kma, KMA); CPACF_QUERY(kdsa, KDSA); -#define CPACF_QAI(name, instruction) \ -static ssize_t name##_query_auth_info_raw_read( \ - struct file *fp, struct kobject *kobj, \ - struct bin_attribute *attr, char *buf, loff_t offs, \ - size_t count) \ -{ \ - cpacf_qai_t qai; \ - \ - if (!cpacf_qai(CPACF_##instruction, &qai)) \ - return -EOPNOTSUPP; \ - return memory_read_from_buffer(buf, count, &offs, &qai, \ - sizeof(qai)); \ -} \ -static BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) +#define CPACF_QAI(name, instruction) \ +static ssize_t name##_query_auth_info_raw_read( \ + struct file *fp, struct kobject *kobj, \ + const struct bin_attribute *attr, char *buf, loff_t offs, \ + size_t count) \ +{ \ + cpacf_qai_t qai; \ + \ + if (!cpacf_qai(CPACF_##instruction, &qai)) \ + return -EOPNOTSUPP; \ + return memory_read_from_buffer(buf, count, &offs, &qai, \ + sizeof(qai)); \ +} \ +static const BIN_ATTR_RO(name##_query_auth_info_raw, sizeof(cpacf_qai_t)) CPACF_QAI(km, KM); CPACF_QAI(kmc, KMC); @@ -69,7 +69,7 @@ CPACF_QAI(prno, PRNO); CPACF_QAI(kma, KMA); CPACF_QAI(kdsa, KDSA); -static struct bin_attribute *cpacf_attrs[] = { +static const struct bin_attribute *const cpacf_attrs[] = { &bin_attr_km_query_raw, &bin_attr_kmc_query_raw, &bin_attr_kimd_query_raw, @@ -101,7 +101,7 @@ static struct bin_attribute *cpacf_attrs[] = { static const struct attribute_group cpacf_attr_grp = { .name = "cpacf", - .bin_attrs = cpacf_attrs, + .bin_attrs_new = cpacf_attrs, }; static int __init cpacf_init(void) From 77977da798c0f97de649c5430bf721a02cb45fcc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Dec 2024 18:54:40 +0100 Subject: [PATCH 33/48] s390/ipl: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Acked-by: Alexander Gordeev Link: https://lore.kernel.org/r/20241211-sysfs-const-bin_attr-s390-v1-2-be01f66bfcf7@weissschuh.net Signed-off-by: Alexander Gordeev --- arch/s390/kernel/ipl.c | 142 ++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 71 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index edbb52ce3f1e..5291e6dd347d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -280,58 +280,58 @@ static struct kobj_attribute sys_##_prefix##_##_name##_attr = \ sys_##_prefix##_##_name##_show, \ sys_##_prefix##_##_name##_store) -#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t size = _ipl_block.scp_data_len; \ - void *scp_data = _ipl_block.scp_data; \ - \ - return memory_read_from_buffer(buf, count, &off, \ - scp_data, size); \ +#define IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ +static ssize_t sys_##_prefix##_scp_data_show(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t size = _ipl_block.scp_data_len; \ + void *scp_data = _ipl_block.scp_data; \ + \ + return memory_read_from_buffer(buf, count, &off, \ + scp_data, size); \ } #define IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ - struct kobject *kobj, \ - struct bin_attribute *attr, \ - char *buf, loff_t off, \ - size_t count) \ -{ \ - size_t scpdata_len = count; \ - size_t padding; \ - \ - if (off) \ - return -EINVAL; \ - \ - memcpy(_ipl_block.scp_data, buf, count); \ - if (scpdata_len % 8) { \ - padding = 8 - (scpdata_len % 8); \ - memset(_ipl_block.scp_data + scpdata_len, \ - 0, padding); \ - scpdata_len += padding; \ - } \ - \ - _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ - _ipl_block.len = _ipl_bp0_len + scpdata_len; \ - _ipl_block.scp_data_len = scpdata_len; \ - \ - return count; \ +static ssize_t sys_##_prefix##_scp_data_store(struct file *filp, \ + struct kobject *kobj, \ + const struct bin_attribute *attr, \ + char *buf, loff_t off, \ + size_t count) \ +{ \ + size_t scpdata_len = count; \ + size_t padding; \ + \ + if (off) \ + return -EINVAL; \ + \ + memcpy(_ipl_block.scp_data, buf, count); \ + if (scpdata_len % 8) { \ + padding = 8 - (scpdata_len % 8); \ + memset(_ipl_block.scp_data + scpdata_len, \ + 0, padding); \ + scpdata_len += padding; \ + } \ + \ + _ipl_block_hdr.len = _ipl_bp_len + scpdata_len; \ + _ipl_block.len = _ipl_bp0_len + scpdata_len; \ + _ipl_block.scp_data_len = scpdata_len; \ + \ + return count; \ } #define DEFINE_IPL_ATTR_SCP_DATA_RO(_prefix, _ipl_block, _size) \ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0444, sys_##_prefix##_scp_data_show, \ NULL, _size) #define DEFINE_IPL_ATTR_SCP_DATA_RW(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len, _size)\ IPL_ATTR_SCP_DATA_SHOW_FN(_prefix, _ipl_block) \ IPL_ATTR_SCP_DATA_STORE_FN(_prefix, _ipl_block_hdr, _ipl_block, _ipl_bp_len, _ipl_bp0_len)\ -static struct bin_attribute sys_##_prefix##_scp_data_attr = \ +static const struct bin_attribute sys_##_prefix##_scp_data_attr = \ __BIN_ATTR(scp_data, 0644, sys_##_prefix##_scp_data_show, \ sys_##_prefix##_scp_data_store, _size) @@ -434,19 +434,19 @@ static struct kobj_attribute sys_ipl_device_attr = __ATTR(device, 0444, sys_ipl_device_show, NULL); static ssize_t sys_ipl_parameter_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { return memory_read_from_buffer(buf, count, &off, &ipl_block, ipl_block.hdr.len); } -static struct bin_attribute sys_ipl_parameter_attr = +static const struct bin_attribute sys_ipl_parameter_attr = __BIN_ATTR(binary_parameter, 0444, sys_ipl_parameter_read, NULL, PAGE_SIZE); DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_fcp, ipl_block.fcp, PAGE_SIZE); -static struct bin_attribute *ipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const ipl_fcp_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_fcp_scp_data_attr, NULL, @@ -454,7 +454,7 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_nvme, ipl_block.nvme, PAGE_SIZE); -static struct bin_attribute *ipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const ipl_nvme_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_nvme_scp_data_attr, NULL, @@ -462,7 +462,7 @@ static struct bin_attribute *ipl_nvme_bin_attrs[] = { DEFINE_IPL_ATTR_SCP_DATA_RO(ipl_eckd, ipl_block.eckd, PAGE_SIZE); -static struct bin_attribute *ipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const ipl_eckd_bin_attrs[] = { &sys_ipl_parameter_attr, &sys_ipl_eckd_scp_data_attr, NULL, @@ -593,9 +593,9 @@ static struct attribute *ipl_fcp_attrs[] = { NULL, }; -static struct attribute_group ipl_fcp_attr_group = { +static const struct attribute_group ipl_fcp_attr_group = { .attrs = ipl_fcp_attrs, - .bin_attrs = ipl_fcp_bin_attrs, + .bin_attrs_new = ipl_fcp_bin_attrs, }; static struct attribute *ipl_nvme_attrs[] = { @@ -607,9 +607,9 @@ static struct attribute *ipl_nvme_attrs[] = { NULL, }; -static struct attribute_group ipl_nvme_attr_group = { +static const struct attribute_group ipl_nvme_attr_group = { .attrs = ipl_nvme_attrs, - .bin_attrs = ipl_nvme_bin_attrs, + .bin_attrs_new = ipl_nvme_bin_attrs, }; static struct attribute *ipl_eckd_attrs[] = { @@ -620,9 +620,9 @@ static struct attribute *ipl_eckd_attrs[] = { NULL, }; -static struct attribute_group ipl_eckd_attr_group = { +static const struct attribute_group ipl_eckd_attr_group = { .attrs = ipl_eckd_attrs, - .bin_attrs = ipl_eckd_bin_attrs, + .bin_attrs_new = ipl_eckd_bin_attrs, }; /* CCW ipl device attributes */ @@ -640,11 +640,11 @@ static struct attribute *ipl_ccw_attrs_lpar[] = { NULL, }; -static struct attribute_group ipl_ccw_attr_group_vm = { +static const struct attribute_group ipl_ccw_attr_group_vm = { .attrs = ipl_ccw_attrs_vm, }; -static struct attribute_group ipl_ccw_attr_group_lpar = { +static const struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; @@ -655,7 +655,7 @@ static struct attribute *ipl_common_attrs[] = { NULL, }; -static struct attribute_group ipl_common_attr_group = { +static const struct attribute_group ipl_common_attr_group = { .attrs = ipl_common_attrs, }; @@ -808,7 +808,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_fcp, reipl_block_fcp->hdr, IPL_BP_FCP_LEN, IPL_BP0_FCP_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_fcp_bin_attrs[] = { +static const struct bin_attribute *const reipl_fcp_bin_attrs[] = { &sys_reipl_fcp_scp_data_attr, NULL, }; @@ -917,9 +917,9 @@ static struct attribute *reipl_fcp_attrs[] = { NULL, }; -static struct attribute_group reipl_fcp_attr_group = { +static const struct attribute_group reipl_fcp_attr_group = { .attrs = reipl_fcp_attrs, - .bin_attrs = reipl_fcp_bin_attrs, + .bin_attrs_new = reipl_fcp_bin_attrs, }; static struct kobj_attribute sys_reipl_fcp_clear_attr = @@ -932,7 +932,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_nvme, reipl_block_nvme->hdr, IPL_BP_NVME_LEN, IPL_BP0_NVME_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_nvme_bin_attrs[] = { +static const struct bin_attribute *const reipl_nvme_bin_attrs[] = { &sys_reipl_nvme_scp_data_attr, NULL, }; @@ -955,9 +955,9 @@ static struct attribute *reipl_nvme_attrs[] = { NULL, }; -static struct attribute_group reipl_nvme_attr_group = { +static const struct attribute_group reipl_nvme_attr_group = { .attrs = reipl_nvme_attrs, - .bin_attrs = reipl_nvme_bin_attrs + .bin_attrs_new = reipl_nvme_bin_attrs }; static ssize_t reipl_nvme_clear_show(struct kobject *kobj, @@ -1031,7 +1031,7 @@ DEFINE_IPL_ATTR_SCP_DATA_RW(reipl_eckd, reipl_block_eckd->hdr, IPL_BP_ECKD_LEN, IPL_BP0_ECKD_LEN, DIAG308_SCPDATA_SIZE); -static struct bin_attribute *reipl_eckd_bin_attrs[] = { +static const struct bin_attribute *const reipl_eckd_bin_attrs[] = { &sys_reipl_eckd_scp_data_attr, NULL, }; @@ -1048,9 +1048,9 @@ static struct attribute *reipl_eckd_attrs[] = { NULL, }; -static struct attribute_group reipl_eckd_attr_group = { +static const struct attribute_group reipl_eckd_attr_group = { .attrs = reipl_eckd_attrs, - .bin_attrs = reipl_eckd_bin_attrs + .bin_attrs_new = reipl_eckd_bin_attrs }; static ssize_t reipl_eckd_clear_show(struct kobject *kobj, @@ -1587,15 +1587,15 @@ static struct attribute *dump_fcp_attrs[] = { NULL, }; -static struct bin_attribute *dump_fcp_bin_attrs[] = { +static const struct bin_attribute *const dump_fcp_bin_attrs[] = { &sys_dump_fcp_scp_data_attr, NULL, }; -static struct attribute_group dump_fcp_attr_group = { +static const struct attribute_group dump_fcp_attr_group = { .name = IPL_FCP_STR, .attrs = dump_fcp_attrs, - .bin_attrs = dump_fcp_bin_attrs, + .bin_attrs_new = dump_fcp_bin_attrs, }; /* NVME dump device attributes */ @@ -1621,15 +1621,15 @@ static struct attribute *dump_nvme_attrs[] = { NULL, }; -static struct bin_attribute *dump_nvme_bin_attrs[] = { +static const struct bin_attribute *const dump_nvme_bin_attrs[] = { &sys_dump_nvme_scp_data_attr, NULL, }; -static struct attribute_group dump_nvme_attr_group = { +static const struct attribute_group dump_nvme_attr_group = { .name = IPL_NVME_STR, .attrs = dump_nvme_attrs, - .bin_attrs = dump_nvme_bin_attrs, + .bin_attrs_new = dump_nvme_bin_attrs, }; /* ECKD dump device attributes */ @@ -1655,15 +1655,15 @@ static struct attribute *dump_eckd_attrs[] = { NULL, }; -static struct bin_attribute *dump_eckd_bin_attrs[] = { +static const struct bin_attribute *const dump_eckd_bin_attrs[] = { &sys_dump_eckd_scp_data_attr, NULL, }; -static struct attribute_group dump_eckd_attr_group = { +static const struct attribute_group dump_eckd_attr_group = { .name = IPL_ECKD_STR, .attrs = dump_eckd_attrs, - .bin_attrs = dump_eckd_bin_attrs, + .bin_attrs_new = dump_eckd_bin_attrs, }; /* CCW dump device attributes */ From ef37c669b71e67bfe948e5f33f7ea040df08eddd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Dec 2024 18:54:41 +0100 Subject: [PATCH 34/48] s390/pci: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Acked-by: Alexander Gordeev Link: https://lore.kernel.org/r/20241211-sysfs-const-bin_attr-s390-v1-3-be01f66bfcf7@weissschuh.net Signed-off-by: Alexander Gordeev --- arch/s390/pci/pci_sysfs.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index 5f46ad58dcd1..2de1ea6c3a8c 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -135,7 +135,7 @@ out: static DEVICE_ATTR_WO(recover); static ssize_t util_string_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -145,10 +145,10 @@ static ssize_t util_string_read(struct file *filp, struct kobject *kobj, return memory_read_from_buffer(buf, count, &off, zdev->util_str, sizeof(zdev->util_str)); } -static BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); +static const BIN_ATTR_RO(util_string, CLP_UTIL_STR_LEN); static ssize_t report_error_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { struct zpci_report_error_header *report = (void *) buf; @@ -164,7 +164,7 @@ static ssize_t report_error_write(struct file *filp, struct kobject *kobj, return ret ? ret : count; } -static BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); +static const BIN_ATTR(report_error, S_IWUSR, NULL, report_error_write, PAGE_SIZE); static ssize_t uid_is_unique_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -203,7 +203,7 @@ const struct attribute_group zpci_ident_attr_group = { .is_visible = zpci_index_is_visible, }; -static struct bin_attribute *zpci_bin_attrs[] = { +static const struct bin_attribute *const zpci_bin_attrs[] = { &bin_attr_util_string, &bin_attr_report_error, NULL, @@ -227,7 +227,7 @@ static struct attribute *zpci_dev_attrs[] = { const struct attribute_group zpci_attr_group = { .attrs = zpci_dev_attrs, - .bin_attrs = zpci_bin_attrs, + .bin_attrs_new = zpci_bin_attrs, }; static struct attribute *pfip_attrs[] = { From 81ad38a66bdfcf51153632deaad305adce887fca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Dec 2024 18:54:42 +0100 Subject: [PATCH 35/48] s390/sclp: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Acked-by: Alexander Gordeev Link: https://lore.kernel.org/r/20241211-sysfs-const-bin_attr-s390-v1-4-be01f66bfcf7@weissschuh.net Signed-off-by: Alexander Gordeev --- drivers/s390/char/sclp_config.c | 4 ++-- drivers/s390/char/sclp_sd.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/s390/char/sclp_config.c b/drivers/s390/char/sclp_config.c index f56ea9b60e08..ae5d28987177 100644 --- a/drivers/s390/char/sclp_config.c +++ b/drivers/s390/char/sclp_config.c @@ -128,7 +128,7 @@ out: } static ssize_t sysfs_ofb_data_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { int rc; @@ -142,7 +142,7 @@ static const struct bin_attribute ofb_bin_attr = { .name = "event_data", .mode = S_IWUSR, }, - .write = sysfs_ofb_data_write, + .write_new = sysfs_ofb_data_write, }; #endif diff --git a/drivers/s390/char/sclp_sd.c b/drivers/s390/char/sclp_sd.c index c2dc9aadb7d2..8524c14affed 100644 --- a/drivers/s390/char/sclp_sd.c +++ b/drivers/s390/char/sclp_sd.c @@ -476,7 +476,7 @@ static struct kobj_type sclp_sd_file_ktype = { * on EOF. */ static ssize_t data_read(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, char *buffer, + const struct bin_attribute *attr, char *buffer, loff_t off, size_t size) { struct sclp_sd_file *sd_file = to_sd_file(kobj); @@ -539,7 +539,7 @@ static __init struct sclp_sd_file *sclp_sd_file_create(const char *name, u8 di) sysfs_bin_attr_init(&sd_file->data_attr); sd_file->data_attr.attr.name = "data"; sd_file->data_attr.attr.mode = 0444; - sd_file->data_attr.read = data_read; + sd_file->data_attr.read_new = data_read; rc = sysfs_create_bin_file(&sd_file->kobj, &sd_file->data_attr); if (rc) { From d1aa46c83bc4cd08a519819233f432a0fce5119b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 11 Dec 2024 18:54:43 +0100 Subject: [PATCH 36/48] s390/pkey: Constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Tested-by: Holger Dengler Reviewed-by: Holger Dengler Link: https://lore.kernel.org/r/20241211-sysfs-const-bin_attr-s390-v1-5-be01f66bfcf7@weissschuh.net Signed-off-by: Alexander Gordeev --- drivers/s390/crypto/pkey_sysfs.c | 126 +++++++++++++++---------------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/drivers/s390/crypto/pkey_sysfs.c b/drivers/s390/crypto/pkey_sysfs.c index a4eb45803f5e..57edc97bafd2 100644 --- a/drivers/s390/crypto/pkey_sysfs.c +++ b/drivers/s390/crypto/pkey_sysfs.c @@ -184,7 +184,7 @@ static ssize_t pkey_protkey_hmac_attr_read(u32 keytype, char *buf, static ssize_t protkey_aes_128_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -194,7 +194,7 @@ static ssize_t protkey_aes_128_read(struct file *filp, static ssize_t protkey_aes_192_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -204,7 +204,7 @@ static ssize_t protkey_aes_192_read(struct file *filp, static ssize_t protkey_aes_256_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -214,7 +214,7 @@ static ssize_t protkey_aes_256_read(struct file *filp, static ssize_t protkey_aes_128_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -224,7 +224,7 @@ static ssize_t protkey_aes_128_xts_read(struct file *filp, static ssize_t protkey_aes_256_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -234,7 +234,7 @@ static ssize_t protkey_aes_256_xts_read(struct file *filp, static ssize_t protkey_aes_xts_128_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -244,7 +244,7 @@ static ssize_t protkey_aes_xts_128_read(struct file *filp, static ssize_t protkey_aes_xts_256_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -254,7 +254,7 @@ static ssize_t protkey_aes_xts_256_read(struct file *filp, static ssize_t protkey_hmac_512_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -264,7 +264,7 @@ static ssize_t protkey_hmac_512_read(struct file *filp, static ssize_t protkey_hmac_1024_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -272,17 +272,17 @@ static ssize_t protkey_hmac_1024_read(struct file *filp, buf, off, count); } -static BIN_ATTR_RO(protkey_aes_128, sizeof(struct protaeskeytoken)); -static BIN_ATTR_RO(protkey_aes_192, sizeof(struct protaeskeytoken)); -static BIN_ATTR_RO(protkey_aes_256, sizeof(struct protaeskeytoken)); -static BIN_ATTR_RO(protkey_aes_128_xts, 2 * sizeof(struct protaeskeytoken)); -static BIN_ATTR_RO(protkey_aes_256_xts, 2 * sizeof(struct protaeskeytoken)); -static BIN_ATTR_RO(protkey_aes_xts_128, sizeof(struct protkeytoken) + 64); -static BIN_ATTR_RO(protkey_aes_xts_256, sizeof(struct protkeytoken) + 96); -static BIN_ATTR_RO(protkey_hmac_512, sizeof(struct protkeytoken) + 96); -static BIN_ATTR_RO(protkey_hmac_1024, sizeof(struct protkeytoken) + 160); +static const BIN_ATTR_RO(protkey_aes_128, sizeof(struct protaeskeytoken)); +static const BIN_ATTR_RO(protkey_aes_192, sizeof(struct protaeskeytoken)); +static const BIN_ATTR_RO(protkey_aes_256, sizeof(struct protaeskeytoken)); +static const BIN_ATTR_RO(protkey_aes_128_xts, 2 * sizeof(struct protaeskeytoken)); +static const BIN_ATTR_RO(protkey_aes_256_xts, 2 * sizeof(struct protaeskeytoken)); +static const BIN_ATTR_RO(protkey_aes_xts_128, sizeof(struct protkeytoken) + 64); +static const BIN_ATTR_RO(protkey_aes_xts_256, sizeof(struct protkeytoken) + 96); +static const BIN_ATTR_RO(protkey_hmac_512, sizeof(struct protkeytoken) + 96); +static const BIN_ATTR_RO(protkey_hmac_1024, sizeof(struct protkeytoken) + 160); -static struct bin_attribute *protkey_attrs[] = { +static const struct bin_attribute *const protkey_attrs[] = { &bin_attr_protkey_aes_128, &bin_attr_protkey_aes_192, &bin_attr_protkey_aes_256, @@ -295,9 +295,9 @@ static struct bin_attribute *protkey_attrs[] = { NULL }; -static struct attribute_group protkey_attr_group = { - .name = "protkey", - .bin_attrs = protkey_attrs, +static const struct attribute_group protkey_attr_group = { + .name = "protkey", + .bin_attrs_new = protkey_attrs, }; /* @@ -341,7 +341,7 @@ static ssize_t pkey_ccadata_aes_attr_read(u32 keytype, bool is_xts, char *buf, static ssize_t ccadata_aes_128_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -351,7 +351,7 @@ static ssize_t ccadata_aes_128_read(struct file *filp, static ssize_t ccadata_aes_192_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -361,7 +361,7 @@ static ssize_t ccadata_aes_192_read(struct file *filp, static ssize_t ccadata_aes_256_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -371,7 +371,7 @@ static ssize_t ccadata_aes_256_read(struct file *filp, static ssize_t ccadata_aes_128_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -381,7 +381,7 @@ static ssize_t ccadata_aes_128_xts_read(struct file *filp, static ssize_t ccadata_aes_256_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -389,13 +389,13 @@ static ssize_t ccadata_aes_256_xts_read(struct file *filp, off, count); } -static BIN_ATTR_RO(ccadata_aes_128, sizeof(struct secaeskeytoken)); -static BIN_ATTR_RO(ccadata_aes_192, sizeof(struct secaeskeytoken)); -static BIN_ATTR_RO(ccadata_aes_256, sizeof(struct secaeskeytoken)); -static BIN_ATTR_RO(ccadata_aes_128_xts, 2 * sizeof(struct secaeskeytoken)); -static BIN_ATTR_RO(ccadata_aes_256_xts, 2 * sizeof(struct secaeskeytoken)); +static const BIN_ATTR_RO(ccadata_aes_128, sizeof(struct secaeskeytoken)); +static const BIN_ATTR_RO(ccadata_aes_192, sizeof(struct secaeskeytoken)); +static const BIN_ATTR_RO(ccadata_aes_256, sizeof(struct secaeskeytoken)); +static const BIN_ATTR_RO(ccadata_aes_128_xts, 2 * sizeof(struct secaeskeytoken)); +static const BIN_ATTR_RO(ccadata_aes_256_xts, 2 * sizeof(struct secaeskeytoken)); -static struct bin_attribute *ccadata_attrs[] = { +static const struct bin_attribute *const ccadata_attrs[] = { &bin_attr_ccadata_aes_128, &bin_attr_ccadata_aes_192, &bin_attr_ccadata_aes_256, @@ -404,9 +404,9 @@ static struct bin_attribute *ccadata_attrs[] = { NULL }; -static struct attribute_group ccadata_attr_group = { - .name = "ccadata", - .bin_attrs = ccadata_attrs, +static const struct attribute_group ccadata_attr_group = { + .name = "ccadata", + .bin_attrs_new = ccadata_attrs, }; #define CCACIPHERTOKENSIZE (sizeof(struct cipherkeytoken) + 80) @@ -455,7 +455,7 @@ static ssize_t pkey_ccacipher_aes_attr_read(enum pkey_key_size keybits, static ssize_t ccacipher_aes_128_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -465,7 +465,7 @@ static ssize_t ccacipher_aes_128_read(struct file *filp, static ssize_t ccacipher_aes_192_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -475,7 +475,7 @@ static ssize_t ccacipher_aes_192_read(struct file *filp, static ssize_t ccacipher_aes_256_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -485,7 +485,7 @@ static ssize_t ccacipher_aes_256_read(struct file *filp, static ssize_t ccacipher_aes_128_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -495,7 +495,7 @@ static ssize_t ccacipher_aes_128_xts_read(struct file *filp, static ssize_t ccacipher_aes_256_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -503,13 +503,13 @@ static ssize_t ccacipher_aes_256_xts_read(struct file *filp, off, count); } -static BIN_ATTR_RO(ccacipher_aes_128, CCACIPHERTOKENSIZE); -static BIN_ATTR_RO(ccacipher_aes_192, CCACIPHERTOKENSIZE); -static BIN_ATTR_RO(ccacipher_aes_256, CCACIPHERTOKENSIZE); -static BIN_ATTR_RO(ccacipher_aes_128_xts, 2 * CCACIPHERTOKENSIZE); -static BIN_ATTR_RO(ccacipher_aes_256_xts, 2 * CCACIPHERTOKENSIZE); +static const BIN_ATTR_RO(ccacipher_aes_128, CCACIPHERTOKENSIZE); +static const BIN_ATTR_RO(ccacipher_aes_192, CCACIPHERTOKENSIZE); +static const BIN_ATTR_RO(ccacipher_aes_256, CCACIPHERTOKENSIZE); +static const BIN_ATTR_RO(ccacipher_aes_128_xts, 2 * CCACIPHERTOKENSIZE); +static const BIN_ATTR_RO(ccacipher_aes_256_xts, 2 * CCACIPHERTOKENSIZE); -static struct bin_attribute *ccacipher_attrs[] = { +static const struct bin_attribute *const ccacipher_attrs[] = { &bin_attr_ccacipher_aes_128, &bin_attr_ccacipher_aes_192, &bin_attr_ccacipher_aes_256, @@ -518,9 +518,9 @@ static struct bin_attribute *ccacipher_attrs[] = { NULL }; -static struct attribute_group ccacipher_attr_group = { - .name = "ccacipher", - .bin_attrs = ccacipher_attrs, +static const struct attribute_group ccacipher_attr_group = { + .name = "ccacipher", + .bin_attrs_new = ccacipher_attrs, }; /* @@ -570,7 +570,7 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits, static ssize_t ep11_aes_128_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -580,7 +580,7 @@ static ssize_t ep11_aes_128_read(struct file *filp, static ssize_t ep11_aes_192_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -590,7 +590,7 @@ static ssize_t ep11_aes_192_read(struct file *filp, static ssize_t ep11_aes_256_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -600,7 +600,7 @@ static ssize_t ep11_aes_256_read(struct file *filp, static ssize_t ep11_aes_128_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -610,7 +610,7 @@ static ssize_t ep11_aes_128_xts_read(struct file *filp, static ssize_t ep11_aes_256_xts_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { @@ -618,13 +618,13 @@ static ssize_t ep11_aes_256_xts_read(struct file *filp, off, count); } -static BIN_ATTR_RO(ep11_aes_128, MAXEP11AESKEYBLOBSIZE); -static BIN_ATTR_RO(ep11_aes_192, MAXEP11AESKEYBLOBSIZE); -static BIN_ATTR_RO(ep11_aes_256, MAXEP11AESKEYBLOBSIZE); -static BIN_ATTR_RO(ep11_aes_128_xts, 2 * MAXEP11AESKEYBLOBSIZE); -static BIN_ATTR_RO(ep11_aes_256_xts, 2 * MAXEP11AESKEYBLOBSIZE); +static const BIN_ATTR_RO(ep11_aes_128, MAXEP11AESKEYBLOBSIZE); +static const BIN_ATTR_RO(ep11_aes_192, MAXEP11AESKEYBLOBSIZE); +static const BIN_ATTR_RO(ep11_aes_256, MAXEP11AESKEYBLOBSIZE); +static const BIN_ATTR_RO(ep11_aes_128_xts, 2 * MAXEP11AESKEYBLOBSIZE); +static const BIN_ATTR_RO(ep11_aes_256_xts, 2 * MAXEP11AESKEYBLOBSIZE); -static struct bin_attribute *ep11_attrs[] = { +static const struct bin_attribute *const ep11_attrs[] = { &bin_attr_ep11_aes_128, &bin_attr_ep11_aes_192, &bin_attr_ep11_aes_256, @@ -633,9 +633,9 @@ static struct bin_attribute *ep11_attrs[] = { NULL }; -static struct attribute_group ep11_attr_group = { +static const struct attribute_group ep11_attr_group = { .name = "ep11", - .bin_attrs = ep11_attrs, + .bin_attrs_new = ep11_attrs, }; const struct attribute_group *pkey_attr_groups[] = { From 03b3e82a78c32c7e4542c33fcd863028cddd9331 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 7 Jan 2025 08:40:06 +0100 Subject: [PATCH 37/48] s390/tlb: Add missing TLB range adjustment While converting to generic mmu_gather with commit 9de7d833e370 ("s390/tlb: Convert to generic mmu_gather") __tlb_adjust_range() is called from pte|pmd|p4d_free_tlb(), but not for pud_free_tlb(). __tlb_adjust_range() adjusts the span of TLB range to be flushed, but s390 does not make use of it. Thus, this change is only for consistency. Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/tlb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index e95b2c8081eb..ea150ea83e57 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -140,11 +140,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, { if (mm_pud_folded(tlb->mm)) return; + __tlb_adjust_range(tlb, address, PAGE_SIZE); tlb->mm->context.flush_mm = 1; tlb->freed_tables = 1; tlb->cleared_p4ds = 1; tlb_remove_ptdesc(tlb, pud); } - #endif /* _S390_TLB_H */ From 9988df07dbe1ca8282f5ae95f1b534f91bab73fc Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Wed, 8 Jan 2025 11:31:27 +0100 Subject: [PATCH 38/48] s390/topology: Improve topology detection Add early polarization detection instead of assuming horizontal polarization. Signed-off-by: Mete Durlu Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/topology.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 4f9c301a705b..39f5ed21f31a 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -556,6 +556,16 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info, } } +static int __init detect_polarization(union topology_entry *tle) +{ + struct topology_core *tl_core; + + while (tle->nl) + tle = next_tle(tle); + tl_core = (struct topology_core *)tle; + return tl_core->pp != POLARIZATION_HRZ; +} + void __init topology_init_early(void) { struct sysinfo_15_1_x *info; @@ -575,6 +585,7 @@ void __init topology_init_early(void) __func__, PAGE_SIZE, PAGE_SIZE); info = tl_info; store_topology(info); + cpu_management = detect_polarization(info->tle); pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n", info->mag[0], info->mag[1], info->mag[2], info->mag[3], info->mag[4], info->mag[5], info->mnest); From 745600ed696593436d2b69211a3d85b588ee18d8 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:01 +0100 Subject: [PATCH 39/48] s390/lib: Use exrl instead of ex in xor functions exrl is present in all machines currently supported, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/lib/xor.c | 61 +++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index fb924a8041dc..ce7bcf7c0032 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -15,7 +15,6 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p2) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" " jm 3f\n" " srlg 0,%0,8\n" @@ -25,12 +24,12 @@ static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, " la %1,256(%1)\n" " la %2,256(%2)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" + "1: exrl %0,2f\n" " j 3f\n" "2: xc 0(1,%1),0(%2)\n" "3:\n" : : "d" (bytes), "a" (p1), "a" (p2) - : "0", "1", "cc", "memory"); + : "0", "cc", "memory"); } static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, @@ -38,9 +37,8 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p3) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 4f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -50,14 +48,14 @@ static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, " la %2,256(%2)\n" " la %3,256(%3)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " j 4f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, @@ -66,9 +64,8 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, const unsigned long * __restrict p4) { asm volatile( - " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 5f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -80,16 +77,16 @@ static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, " la %3,256(%3)\n" " la %4,256(%4)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " j 5f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, @@ -101,7 +98,7 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, asm volatile( " larl 1,2f\n" " aghi %0,-1\n" - " jm 3f\n" + " jm 6f\n" " srlg 0,%0,8\n" " ltgr 0,0\n" " jz 1f\n" @@ -115,19 +112,19 @@ static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, " la %4,256(%4)\n" " la %5,256(%5)\n" " brctg 0,0b\n" - "1: ex %0,0(1)\n" - " ex %0,6(1)\n" - " ex %0,12(1)\n" - " ex %0,18(1)\n" - " j 3f\n" + "1: exrl %0,2f\n" + " exrl %0,3f\n" + " exrl %0,4f\n" + " exrl %0,5f\n" + " j 6f\n" "2: xc 0(1,%1),0(%2)\n" - " xc 0(1,%1),0(%3)\n" - " xc 0(1,%1),0(%4)\n" - " xc 0(1,%1),0(%5)\n" - "3:\n" + "3: xc 0(1,%1),0(%3)\n" + "4: xc 0(1,%1),0(%4)\n" + "5: xc 0(1,%1),0(%5)\n" + "6:\n" : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), "+a" (p5) - : : "0", "1", "cc", "memory"); + : : "0", "cc", "memory"); } struct xor_block_template xor_block_xc = { From a88c26bb8e04ee5f2678225c0130a5fbc08eef85 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:02 +0100 Subject: [PATCH 40/48] s390/stackleak: Use exrl instead of ex in __stackleak_poison() exrl is present in all machines currently supported, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/processor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8761fd01a9f0..4f8d5592c298 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -163,8 +163,7 @@ static __always_inline void __stackleak_poison(unsigned long erase_low, " la %[addr],256(%[addr])\n" " brctg %[tmp],0b\n" "1: stg %[poison],0(%[addr])\n" - " larl %[tmp],3f\n" - " ex %[count],0(%[tmp])\n" + " exrl %[count],3f\n" " j 4f\n" "2: stg %[poison],0(%[addr])\n" " j 4f\n" From 90c5515dcb9c824db244f42a98c765bd0542f109 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:03 +0100 Subject: [PATCH 41/48] s390/amode31: Use exrl instead of ex exrl is present in all machines currently supported, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/kernel/text_amode31.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index c0a70efa2426..26f2981aa09e 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -18,8 +18,7 @@ * affects a few functions that are not performance-relevant. */ .macro BR_EX_AMODE31_r14 - larl %r1,0f - ex 0,0(%r1) + exrl 0,0f j . 0: br %r14 .endm From 94446b4dcbbee79ff18173435950b2d1a7513c1b Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:04 +0100 Subject: [PATCH 42/48] s390/ebcdic: Use exrl instead of ex exrl is present in all machines currently supported, therefore prefer it over ex. This saves one instruction and doesn't need an additional register to hold the address of the target instruction. Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/ebcdic.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index efb50fc6866c..cfa340552328 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -25,15 +25,15 @@ codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr) if (nr-- <= 0) return; asm volatile( - " bras 1,1f\n" - " tr 0(1,%0),0(%2)\n" - "0: tr 0(256,%0),0(%2)\n" + " j 2f\n" + "0: tr 0(1,%0),0(%2)\n" + "1: tr 0(256,%0),0(%2)\n" " la %0,256(%0)\n" - "1: ahi %1,-256\n" - " jnm 0b\n" - " ex %1,0(1)" + "2: ahi %1,-256\n" + " jnm 1b\n" + " exrl %1,0b" : "+&a" (addr), "+&a" (nr) - : "a" (codepage) : "cc", "memory", "1"); + : "a" (codepage) : "cc", "memory"); } #define ASCEBC(addr,nr) codepage_convert(_ascebc, addr, nr) From 4a0f62a3009be6515f5863970994a350a938ac52 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:05 +0100 Subject: [PATCH 43/48] s390/ebcdic: Fix length check in codepage_convert() The current code compares whether the nr argument is less or equal to zero. As nr is of type unsigned long, this isn't correct. Fix this by just testing for zero. This is also reported by checkpatch: unsignedLessThanZero: Checking if unsigned expression 'nr--' is less than zero. Reported-by: Jens Remus Signed-off-by: Sven Schnelle Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/ebcdic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index cfa340552328..7fdb6b92985a 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -22,7 +22,7 @@ extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */ static inline void codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr) { - if (nr-- <= 0) + if (!nr--) return; asm volatile( " j 2f\n" From 061a5e4ac36dac35bb4dfd6a6054c5657b953881 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Wed, 8 Jan 2025 15:27:06 +0100 Subject: [PATCH 44/48] s390/ebcdic: Fix length decrement in codepage_convert() The inline assembly uses the ahi instruction to decrement and test whether more than 256 bytes are left for conversion. But the nr variable passed is of type unsigned long. Therefore use aghi. Signed-off-by: Sven Schnelle Reported-by: Jens Remus Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/ebcdic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h index 7fdb6b92985a..7164cb658435 100644 --- a/arch/s390/include/asm/ebcdic.h +++ b/arch/s390/include/asm/ebcdic.h @@ -29,7 +29,7 @@ codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr) "0: tr 0(1,%0),0(%2)\n" "1: tr 0(256,%0),0(%2)\n" " la %0,256(%0)\n" - "2: ahi %1,-256\n" + "2: aghi %1,-256\n" " jnm 1b\n" " exrl %1,0b" : "+&a" (addr), "+&a" (nr) From 8cae8e0afb2f1f6879efa6899b6323171c3bd990 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Dec 2024 13:27:33 +0100 Subject: [PATCH 45/48] s390/bitops: Switch to generic bitops The generic bitops implementation is nearly identical to the s390 implementation therefore switch to the generic variant. This results in a small kernel image size decrease. This is because for the generic variant the nr parameter for most bitops functions is of type unsigned int while the s390 variant uses unsigned long. bloat-o-meter: add/remove: 670/670 grow/shrink: 167/209 up/down: 21440/-21792 (-352) Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/bitops.h | 181 +-------------------------------- 1 file changed, 3 insertions(+), 178 deletions(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 54a079cd39ed..1e790c351daa 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -36,184 +36,9 @@ #include #include #include -#include -#include - -#define __BITOPS_WORDS(bits) (((bits) + BITS_PER_LONG - 1) / BITS_PER_LONG) - -static inline unsigned long * -__bitops_word(unsigned long nr, const volatile unsigned long *ptr) -{ - unsigned long addr; - - addr = (unsigned long)ptr + ((nr ^ (nr & (BITS_PER_LONG - 1))) >> 3); - return (unsigned long *)addr; -} - -static inline unsigned long __bitops_mask(unsigned long nr) -{ - return 1UL << (nr & (BITS_PER_LONG - 1)); -} - -static __always_inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_or(mask, (long *)addr); -} - -static __always_inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_and(~mask, (long *)addr); -} - -static __always_inline void arch_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - - __atomic64_xor(mask, (long *)addr); -} - -static inline bool arch_test_and_set_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_or_barrier(mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_clear_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_and_barrier(~mask, (long *)addr); - return old & mask; -} - -static inline bool arch_test_and_change_bit(unsigned long nr, - volatile unsigned long *ptr) -{ - unsigned long *addr = __bitops_word(nr, ptr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)addr); - return old & mask; -} - -static __always_inline void -arch___set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p |= mask; -} - -static __always_inline void -arch___clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p &= ~mask; -} - -static __always_inline void -arch___change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - - *p ^= mask; -} - -static __always_inline bool -arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p |= mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p &= ~mask; - return old & mask; -} - -static __always_inline bool -arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) -{ - unsigned long *p = __bitops_word(nr, addr); - unsigned long mask = __bitops_mask(nr); - unsigned long old; - - old = *p; - *p ^= mask; - return old & mask; -} - -#define arch_test_bit generic_test_bit -#define arch_test_bit_acquire generic_test_bit_acquire - -static inline bool arch_test_and_set_bit_lock(unsigned long nr, - volatile unsigned long *ptr) -{ - if (arch_test_bit(nr, ptr)) - return true; - return arch_test_and_set_bit(nr, ptr); -} - -static inline void arch_clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb__before_atomic(); - arch_clear_bit(nr, ptr); -} - -static inline void arch___clear_bit_unlock(unsigned long nr, - volatile unsigned long *ptr) -{ - smp_mb(); - arch___clear_bit(nr, ptr); -} - -static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask, - volatile unsigned long *ptr) -{ - unsigned long old; - - old = __atomic64_xor_barrier(mask, (long *)ptr); - return old & BIT(7); -} -#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte - -#include -#include -#include +#include +#include +#include /* * Functions which use MSB0 bit numbering. From b2bc1b1a77c0ffc2f51e90b1112d7f5530e4d15c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Dec 2024 13:27:34 +0100 Subject: [PATCH 46/48] s390/bitops: Provide optimized arch_test_bit() Provide an optimized arch_test_bit() implementation which makes use of flag output constraint. This generates slightly better code: bloat-o-meter: add/remove: 51/19 grow/shrink: 450/2444 up/down: 25198/-49136 (-23938) Acked-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/bitops.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 1e790c351daa..15aa64e3020e 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -36,8 +36,40 @@ #include #include #include +#include + +#define arch___set_bit generic___set_bit +#define arch___clear_bit generic___clear_bit +#define arch___change_bit generic___change_bit +#define arch___test_and_set_bit generic___test_and_set_bit +#define arch___test_and_clear_bit generic___test_and_clear_bit +#define arch___test_and_change_bit generic___test_and_change_bit +#define arch_test_bit_acquire generic_test_bit_acquire + +static __always_inline bool arch_test_bit(unsigned long nr, const volatile unsigned long *ptr) +{ +#ifdef __HAVE_ASM_FLAG_OUTPUTS__ + const volatile unsigned char *addr; + unsigned long mask; + int cc; + + if (__builtin_constant_p(nr)) { + addr = (const volatile unsigned char *)ptr; + addr += (nr ^ (BITS_PER_LONG - BITS_PER_BYTE)) / BITS_PER_BYTE; + mask = 1UL << (nr & (BITS_PER_BYTE - 1)); + asm volatile( + " tm %[addr],%[mask]\n" + : "=@cc" (cc) + : [addr] "R" (*addr), [mask] "I" (mask) + ); + return cc == 3; + } +#endif + return generic_test_bit(nr, ptr); +} + #include -#include +#include #include /* From 0d30871739ab433e114b0058f08b6b1c7b816f7e Mon Sep 17 00:00:00 2001 From: Mete Durlu Date: Tue, 14 Jan 2025 17:03:09 +0100 Subject: [PATCH 47/48] s390/diag: Add memory topology information via diag310 Introduce diag310 and memory topology related subcodes. Provide memory topology information obtanied from diag310 to userspace via diag ioctl. Signed-off-by: Mete Durlu Reviewed-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/diag.h | 1 + arch/s390/include/asm/sclp.h | 1 + arch/s390/include/uapi/asm/diag.h | 8 + arch/s390/kernel/diag/Makefile | 2 +- arch/s390/kernel/diag/diag.c | 1 + arch/s390/kernel/diag/diag310.c | 276 +++++++++++++++++++++++++++++ arch/s390/kernel/diag/diag_ioctl.h | 4 + arch/s390/kernel/diag/diag_misc.c | 9 + drivers/s390/char/sclp_early.c | 1 + 9 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 arch/s390/kernel/diag/diag310.c diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index c1050f4a7107..5790630e31f0 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -36,6 +36,7 @@ enum diag_stat_enum { DIAG_STAT_X2FC, DIAG_STAT_X304, DIAG_STAT_X308, + DIAG_STAT_X310, DIAG_STAT_X318, DIAG_STAT_X320, DIAG_STAT_X324, diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 8ec6e436bf4c..4da3b2956285 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -92,6 +92,7 @@ struct sclp_info { unsigned char has_kss : 1; unsigned char has_diag204_bif : 1; unsigned char has_gisaf : 1; + unsigned char has_diag310 : 1; unsigned char has_diag318 : 1; unsigned char has_diag320 : 1; unsigned char has_diag324 : 1; diff --git a/arch/s390/include/uapi/asm/diag.h b/arch/s390/include/uapi/asm/diag.h index 361c9ba9d4f5..b7e6ccb4ff6e 100644 --- a/arch/s390/include/uapi/asm/diag.h +++ b/arch/s390/include/uapi/asm/diag.h @@ -17,8 +17,16 @@ struct diag324_pib { __u64 sequence; }; +struct diag310_memtop { + __u64 address; + __u64 nesting_lvl; +}; + /* Diag ioctl definitions */ #define DIAG324_GET_PIBBUF _IOWR(DIAG_MAGIC_STR, 0x77, struct diag324_pib) #define DIAG324_GET_PIBLEN _IOR(DIAG_MAGIC_STR, 0x78, size_t) +#define DIAG310_GET_STRIDE _IOR(DIAG_MAGIC_STR, 0x79, size_t) +#define DIAG310_GET_MEMTOPLEN _IOWR(DIAG_MAGIC_STR, 0x7a, size_t) +#define DIAG310_GET_MEMTOPBUF _IOWR(DIAG_MAGIC_STR, 0x7b, struct diag310_memtop) #endif /* __S390_UAPI_ASM_DIAG_H */ diff --git a/arch/s390/kernel/diag/Makefile b/arch/s390/kernel/diag/Makefile index 44d3e0471151..956aee6c4090 100644 --- a/arch/s390/kernel/diag/Makefile +++ b/arch/s390/kernel/diag/Makefile @@ -1 +1 @@ -obj-y := diag_misc.o diag324.o diag.o +obj-y := diag_misc.o diag324.o diag.o diag310.o diff --git a/arch/s390/kernel/diag/diag.c b/arch/s390/kernel/diag/diag.c index 91161f09299f..e15b8dee3228 100644 --- a/arch/s390/kernel/diag/diag.c +++ b/arch/s390/kernel/diag/diag.c @@ -51,6 +51,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X2FC] = { .code = 0x2fc, .name = "Guest Performance Data" }, [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, + [DIAG_STAT_X310] = { .code = 0x310, .name = "Memory Topology Information" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, [DIAG_STAT_X324] = { .code = 0x324, .name = "Power Information Block" }, diff --git a/arch/s390/kernel/diag/diag310.c b/arch/s390/kernel/diag/diag310.c new file mode 100644 index 000000000000..d6a34454aa5a --- /dev/null +++ b/arch/s390/kernel/diag/diag310.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Request memory topology information via diag0x310. + * + * Copyright IBM Corp. 2025 + */ + +#include +#include +#include +#include +#include +#include +#include +#include "diag_ioctl.h" + +#define DIAG310_LEVELMIN 1 +#define DIAG310_LEVELMAX 6 + +enum diag310_sc { + DIAG310_SUBC_0 = 0, + DIAG310_SUBC_1 = 1, + DIAG310_SUBC_4 = 4, + DIAG310_SUBC_5 = 5 +}; + +enum diag310_retcode { + DIAG310_RET_SUCCESS = 0x0001, + DIAG310_RET_BUSY = 0x0101, + DIAG310_RET_OPNOTSUPP = 0x0102, + DIAG310_RET_SC4_INVAL = 0x0401, + DIAG310_RET_SC4_NODATA = 0x0402, + DIAG310_RET_SC5_INVAL = 0x0501, + DIAG310_RET_SC5_NODATA = 0x0502, + DIAG310_RET_SC5_ESIZE = 0x0503 +}; + +union diag310_response { + u64 response; + struct { + u64 result : 32; + u64 : 16; + u64 rc : 16; + }; +}; + +union diag310_req_subcode { + u64 subcode; + struct { + u64 : 48; + u64 st : 8; + u64 sc : 8; + }; +}; + +union diag310_req_size { + u64 size; + struct { + u64 page_count : 32; + u64 : 32; + }; +}; + +static inline unsigned long diag310(unsigned long subcode, unsigned long size, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, .odd = size }; + + diag_stat_inc(DIAG_STAT_X310); + asm volatile("diag %[rp],%[subcode],0x310\n" + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "memory"); + return rp.odd; +} + +static int diag310_result_to_errno(unsigned int result) +{ + switch (result) { + case DIAG310_RET_BUSY: + return -EBUSY; + case DIAG310_RET_OPNOTSUPP: + return -EOPNOTSUPP; + default: + return -EINVAL; + } +} + +static int diag310_get_subcode_mask(unsigned long *mask) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_0, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *mask = res.response; + return 0; +} + +static int diag310_get_memtop_stride(unsigned long *stride) +{ + union diag310_response res; + + res.response = diag310(DIAG310_SUBC_1, 0, NULL); + if (res.rc != DIAG310_RET_SUCCESS) + return diag310_result_to_errno(res.rc); + *stride = res.result; + return 0; +} + +static int diag310_get_memtop_size(unsigned long *pages, unsigned long level) +{ + union diag310_req_subcode req = { .sc = DIAG310_SUBC_4, .st = level }; + union diag310_response res; + + res.response = diag310(req.subcode, 0, NULL); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + *pages = res.result; + return 0; + case DIAG310_RET_SC4_NODATA: + return -ENODATA; + case DIAG310_RET_SC4_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_store_topology_map(void *buf, unsigned long pages, unsigned long level) +{ + union diag310_req_subcode req_sc = { .sc = DIAG310_SUBC_5, .st = level }; + union diag310_req_size req_size = { .page_count = pages }; + union diag310_response res; + + res.response = diag310(req_sc.subcode, req_size.size, buf); + switch (res.rc) { + case DIAG310_RET_SUCCESS: + return 0; + case DIAG310_RET_SC5_NODATA: + return -ENODATA; + case DIAG310_RET_SC5_ESIZE: + return -EOVERFLOW; + case DIAG310_RET_SC5_INVAL: + return -EINVAL; + default: + return diag310_result_to_errno(res.rc); + } +} + +static int diag310_check_features(void) +{ + static int features_available; + unsigned long mask; + int rc; + + if (READ_ONCE(features_available)) + return 0; + if (!sclp.has_diag310) + return -EOPNOTSUPP; + rc = diag310_get_subcode_mask(&mask); + if (rc) + return rc; + if (!test_bit_inv(DIAG310_SUBC_1, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_4, &mask)) + return -EOPNOTSUPP; + if (!test_bit_inv(DIAG310_SUBC_5, &mask)) + return -EOPNOTSUPP; + WRITE_ONCE(features_available, 1); + return 0; +} + +static int memtop_get_stride_len(unsigned long *res) +{ + static unsigned long memtop_stride; + unsigned long stride; + int rc; + + stride = READ_ONCE(memtop_stride); + if (!stride) { + rc = diag310_get_memtop_stride(&stride); + if (rc) + return rc; + WRITE_ONCE(memtop_stride, stride); + } + *res = stride; + return 0; +} + +static int memtop_get_page_count(unsigned long *res, unsigned long level) +{ + static unsigned long memtop_pages[DIAG310_LEVELMAX]; + unsigned long pages; + int rc; + + if (level > DIAG310_LEVELMAX || level < DIAG310_LEVELMIN) + return -EINVAL; + pages = READ_ONCE(memtop_pages[level - 1]); + if (!pages) { + rc = diag310_get_memtop_size(&pages, level); + if (rc) + return rc; + WRITE_ONCE(memtop_pages[level - 1], pages); + } + *res = pages; + return 0; +} + +long diag310_memtop_stride(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long stride; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + rc = memtop_get_stride_len(&stride); + if (rc) + return rc; + if (put_user(stride, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_len(unsigned long arg) +{ + size_t __user *argp = (void __user *)arg; + unsigned long pages, level; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, argp)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + if (put_user(pages * PAGE_SIZE, argp)) + return -EFAULT; + return 0; +} + +long diag310_memtop_buf(unsigned long arg) +{ + struct diag310_memtop __user *udata = (struct diag310_memtop __user *)arg; + unsigned long level, pages, data_size; + u64 address; + void *buf; + int rc; + + rc = diag310_check_features(); + if (rc) + return rc; + if (get_user(level, &udata->nesting_lvl)) + return -EFAULT; + if (get_user(address, &udata->address)) + return -EFAULT; + rc = memtop_get_page_count(&pages, level); + if (rc) + return rc; + data_size = pages * PAGE_SIZE; + buf = __vmalloc_node(data_size, PAGE_SIZE, GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, + NUMA_NO_NODE, __builtin_return_address(0)); + if (!buf) + return -ENOMEM; + rc = diag310_store_topology_map(buf, pages, level); + if (rc) + goto out; + if (copy_to_user((void __user *)address, buf, data_size)) + rc = -EFAULT; +out: + vfree(buf); + return rc; +} diff --git a/arch/s390/kernel/diag/diag_ioctl.h b/arch/s390/kernel/diag/diag_ioctl.h index de5365b23e11..7080be946785 100644 --- a/arch/s390/kernel/diag/diag_ioctl.h +++ b/arch/s390/kernel/diag/diag_ioctl.h @@ -7,4 +7,8 @@ long diag324_pibbuf(unsigned long arg); long diag324_piblen(unsigned long arg); +long diag310_memtop_stride(unsigned long arg); +long diag310_memtop_len(unsigned long arg); +long diag310_memtop_buf(unsigned long arg); + #endif /* _DIAG_IOCTL_H */ diff --git a/arch/s390/kernel/diag/diag_misc.c b/arch/s390/kernel/diag/diag_misc.c index 1f4f5c757329..efffe02ea02e 100644 --- a/arch/s390/kernel/diag/diag_misc.c +++ b/arch/s390/kernel/diag/diag_misc.c @@ -26,6 +26,15 @@ static long diag_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case DIAG324_GET_PIBBUF: rc = diag324_pibbuf(arg); break; + case DIAG310_GET_STRIDE: + rc = diag310_memtop_stride(arg); + break; + case DIAG310_GET_MEMTOPLEN: + rc = diag310_memtop_len(arg); + break; + case DIAG310_GET_MEMTOPBUF: + rc = diag310_memtop_buf(arg); + break; default: rc = -ENOIOCTLCMD; break; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index b7a55ea6c77a..d9d6edaf8de8 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -55,6 +55,7 @@ static void __init sclp_early_facilities_detect(void) if (sccb->fac91 & 0x40) get_lowcore()->machine_flags |= MACHINE_FLAG_TLB_GUEST; sclp.has_diag204_bif = !!(sccb->fac98 & 0x80); + sclp.has_diag310 = !!(sccb->fac91 & 0x80); if (sccb->cpuoff > 134) { sclp.has_diag318 = !!(sccb->byte_134 & 0x80); sclp.has_diag320 = !!(sccb->byte_134 & 0x04); From 26701574cee6777f867f89b4a5c667817e1ee0dd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 7 Jan 2025 11:28:58 +0100 Subject: [PATCH 48/48] s390/futex: Fix FUTEX_OP_ANDN implementation The futex operation FUTEX_OP_ANDN is supposed to implement *(int *)UADDR2 &= ~OPARG; The s390 implementation just implements an AND instead of ANDN. Add the missing bitwise not operation to oparg to fix this. This is broken since nearly 19 years, so it looks like user space is not making use of this operation. Fixes: 3363fbdd6fb4 ("[PATCH] s390: futex atomic operations") Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Acked-by: Alexander Gordeev Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/futex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index eaeaeb3ff0be..752a2310f0d6 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -44,7 +44,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, break; case FUTEX_OP_ANDN: __futex_atomic_op("lr %2,%1\nnr %2,%5\n", - ret, oldval, newval, uaddr, oparg); + ret, oldval, newval, uaddr, ~oparg); break; case FUTEX_OP_XOR: __futex_atomic_op("lr %2,%1\nxr %2,%5\n",