mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-10 15:55:22 +02:00

We've had issues with gcc and 'asm goto' before, and we created a 'asm_volatile_goto()' macro for that in the past: see commits3f0116c323
("compiler/gcc4: Add quirk for 'asm goto' miscompilation bug") anda9f180345f
("compiler/gcc4: Make quirk for asm_volatile_goto() unconditional"). Then, much later, we ended up removing the workaround in commit43c249ea0b
("compiler-gcc.h: remove ancient workaround for gcc PR 58670") because we no longer supported building the kernel with the affected gcc versions, but we left the macro uses around. Now, Sean Christopherson reports a new version of a very similar problem, which is fixed by re-applying that ancient workaround. But the problem in question is limited to only the 'asm goto with outputs' cases, so instead of re-introducing the old workaround as-is, let's rename and limit the workaround to just that much less common case. It looks like there are at least two separate issues that all hit in this area: (a) some versions of gcc don't mark the asm goto as 'volatile' when it has outputs: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110420 which is easy to work around by just adding the 'volatile' by hand. (b) Internal compiler errors: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110422 which are worked around by adding the extra empty 'asm' as a barrier, as in the original workaround. but the problem Sean sees may be a third thing since it involves bad code generation (not an ICE) even with the manually added 'volatile'. but the same old workaround works for this case, even if this feels a bit like voodoo programming and may only be hiding the issue. Reported-and-tested-by: Sean Christopherson <seanjc@google.com> Link: https://lore.kernel.org/all/20240208220604.140859-1-seanjc@google.com/ Cc: Nick Desaulniers <ndesaulniers@google.com> Cc: Uros Bizjak <ubizjak@gmail.com> Cc: Jakub Jelinek <jakub@redhat.com> Cc: Andrew Pinski <quic_apinski@quicinc.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
143 lines
4.5 KiB
C
143 lines
4.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_UM_CPUFEATURE_H
|
|
#define _ASM_UM_CPUFEATURE_H
|
|
|
|
#include <asm/processor.h>
|
|
|
|
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
|
|
|
|
#include <asm/asm.h>
|
|
#include <linux/bitops.h>
|
|
|
|
extern const char * const x86_cap_flags[NCAPINTS*32];
|
|
extern const char * const x86_power_flags[32];
|
|
#define X86_CAP_FMT "%s"
|
|
#define x86_cap_flag(flag) x86_cap_flags[flag]
|
|
|
|
/*
|
|
* In order to save room, we index into this array by doing
|
|
* X86_BUG_<name> - NCAPINTS*32.
|
|
*/
|
|
extern const char * const x86_bug_flags[NBUGINTS*32];
|
|
|
|
#define test_cpu_cap(c, bit) \
|
|
test_bit(bit, (unsigned long *)((c)->x86_capability))
|
|
|
|
/*
|
|
* There are 32 bits/features in each mask word. The high bits
|
|
* (selected with (bit>>5) give us the word number and the low 5
|
|
* bits give us the bit/feature number inside the word.
|
|
* (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
|
|
* see if it is set in the mask word.
|
|
*/
|
|
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
|
|
(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
|
|
|
|
#define cpu_has(c, bit) \
|
|
test_cpu_cap(c, bit)
|
|
|
|
#define this_cpu_has(bit) \
|
|
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
|
|
x86_this_cpu_test_bit(bit, \
|
|
(unsigned long __percpu *)&cpu_info.x86_capability))
|
|
|
|
/*
|
|
* This macro is for detection of features which need kernel
|
|
* infrastructure to be used. It may *not* directly test the CPU
|
|
* itself. Use the cpu_has() family if you want true runtime
|
|
* testing of CPU features, like in hypervisor code where you are
|
|
* supporting a possible guest feature where host support for it
|
|
* is not relevant.
|
|
*/
|
|
#define cpu_feature_enabled(bit) \
|
|
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
|
|
|
|
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
|
|
|
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
|
|
|
extern void setup_clear_cpu_cap(unsigned int bit);
|
|
|
|
#define setup_force_cpu_cap(bit) do { \
|
|
set_cpu_cap(&boot_cpu_data, bit); \
|
|
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
|
} while (0)
|
|
|
|
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
|
|
|
|
/*
|
|
* Static testing of CPU features. Used the same as boot_cpu_has(). It
|
|
* statically patches the target code for additional performance. Use
|
|
* static_cpu_has() only in fast paths, where every cycle counts. Which
|
|
* means that the boot_cpu_has() variant is already fast enough for the
|
|
* majority of cases and you should stick to using it as it is generally
|
|
* only two instructions: a RIP-relative MOV and a TEST.
|
|
*/
|
|
static __always_inline bool _static_cpu_has(u16 bit)
|
|
{
|
|
asm goto("1: jmp 6f\n"
|
|
"2:\n"
|
|
".skip -(((5f-4f) - (2b-1b)) > 0) * "
|
|
"((5f-4f) - (2b-1b)),0x90\n"
|
|
"3:\n"
|
|
".section .altinstructions,\"a\"\n"
|
|
" .long 1b - .\n" /* src offset */
|
|
" .long 4f - .\n" /* repl offset */
|
|
" .word %P[always]\n" /* always replace */
|
|
" .byte 3b - 1b\n" /* src len */
|
|
" .byte 5f - 4f\n" /* repl len */
|
|
" .byte 3b - 2b\n" /* pad len */
|
|
".previous\n"
|
|
".section .altinstr_replacement,\"ax\"\n"
|
|
"4: jmp %l[t_no]\n"
|
|
"5:\n"
|
|
".previous\n"
|
|
".section .altinstructions,\"a\"\n"
|
|
" .long 1b - .\n" /* src offset */
|
|
" .long 0\n" /* no replacement */
|
|
" .word %P[feature]\n" /* feature bit */
|
|
" .byte 3b - 1b\n" /* src len */
|
|
" .byte 0\n" /* repl len */
|
|
" .byte 0\n" /* pad len */
|
|
".previous\n"
|
|
".section .altinstr_aux,\"ax\"\n"
|
|
"6:\n"
|
|
" testb %[bitnum],%[cap_byte]\n"
|
|
" jnz %l[t_yes]\n"
|
|
" jmp %l[t_no]\n"
|
|
".previous\n"
|
|
: : [feature] "i" (bit),
|
|
[always] "i" (X86_FEATURE_ALWAYS),
|
|
[bitnum] "i" (1 << (bit & 7)),
|
|
[cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
|
|
: : t_yes, t_no);
|
|
t_yes:
|
|
return true;
|
|
t_no:
|
|
return false;
|
|
}
|
|
|
|
#define static_cpu_has(bit) \
|
|
( \
|
|
__builtin_constant_p(boot_cpu_has(bit)) ? \
|
|
boot_cpu_has(bit) : \
|
|
_static_cpu_has(bit) \
|
|
)
|
|
|
|
#define cpu_has_bug(c, bit) cpu_has(c, (bit))
|
|
#define set_cpu_bug(c, bit) set_cpu_cap(c, (bit))
|
|
|
|
#define static_cpu_has_bug(bit) static_cpu_has((bit))
|
|
#define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit))
|
|
#define boot_cpu_set_bug(bit) set_cpu_cap(&boot_cpu_data, (bit))
|
|
|
|
#define MAX_CPU_FEATURES (NCAPINTS * 32)
|
|
#define cpu_have_feature boot_cpu_has
|
|
|
|
#define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X"
|
|
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
|
|
boot_cpu_data.x86_model
|
|
|
|
#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
|
|
#endif /* _ASM_UM_CPUFEATURE_H */
|