mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-05 13:25:20 +02:00

[ Upstream commit 1f13c60d84e880df6698441026e64f84c7110c49 ] The following commit, 12 years ago:7e98b71920
("x86, idle: Use static_cpu_has() for CLFLUSH workaround, add barriers") added barriers around the CLFLUSH in mwait_idle_with_hints(), justified with: ... and add memory barriers around it since the documentation is explicit that CLFLUSH is only ordered with respect to MFENCE. This also triggered, 11 years ago, the same adjustment in:f8e617f458
("sched/idle/x86: Optimize unnecessary mwait_idle() resched IPIs") during development, although it failed to get the static_cpu_has_bug() treatment. X86_BUG_CLFLUSH_MONITOR (a.k.a the AAI65 errata) is specific to Intel CPUs, and the SDM currently states: Executions of the CLFLUSH instruction are ordered with respect to each other and with respect to writes, locked read-modify-write instructions, and fence instructions[1]. With footnote 1 reading: Earlier versions of this manual specified that executions of the CLFLUSH instruction were ordered only by the MFENCE instruction. All processors implementing the CLFLUSH instruction also order it relative to the other operations enumerated above. i.e. The SDM was incorrect at the time, and barriers should not have been inserted. Double checking the original AAI65 errata (not available from intel.com any more) shows no mention of barriers either. Note: If this were a general codepath, the MFENCEs would be needed, because AMD CPUs of the same vintage do sport otherwise-unordered CLFLUSHs. Remove the unnecessary barriers. Furthermore, use a plain alternative(), rather than static_cpu_has_bug() and/or no optimisation. The workaround is a single instruction. Use an explicit %rax pointer rather than a general memory operand, because MONITOR takes the pointer implicitly in the same way. [ mingo: Cleaned up the commit a bit. ] Fixes:7e98b71920
("x86, idle: Use static_cpu_has() for CLFLUSH workaround, add barriers") Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Acked-by: Dave Hansen <dave.hansen@intel.com> Acked-by: Borislav Petkov (AMD) <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Rik van Riel <riel@surriel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Juergen Gross <jgross@suse.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Link: https://lore.kernel.org/r/20250402172458.1378112-1-andrew.cooper3@citrix.com Signed-off-by: Sasha Levin <sashal@kernel.org>
149 lines
4.5 KiB
C
149 lines
4.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _ASM_X86_MWAIT_H
|
|
#define _ASM_X86_MWAIT_H
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/sched/idle.h>
|
|
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/nospec-branch.h>
|
|
|
|
#define MWAIT_SUBSTATE_MASK 0xf
|
|
#define MWAIT_CSTATE_MASK 0xf
|
|
#define MWAIT_SUBSTATE_SIZE 4
|
|
#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK)
|
|
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
|
|
#define MWAIT_C1_SUBSTATE_MASK 0xf0
|
|
|
|
#define CPUID_MWAIT_LEAF 5
|
|
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
|
|
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
|
|
|
|
#define MWAIT_ECX_INTERRUPT_BREAK 0x1
|
|
#define MWAITX_ECX_TIMER_ENABLE BIT(1)
|
|
#define MWAITX_MAX_WAIT_CYCLES UINT_MAX
|
|
#define MWAITX_DISABLE_CSTATES 0xf0
|
|
#define TPAUSE_C01_STATE 1
|
|
#define TPAUSE_C02_STATE 0
|
|
|
|
static __always_inline void __monitor(const void *eax, unsigned long ecx,
|
|
unsigned long edx)
|
|
{
|
|
/* "monitor %eax, %ecx, %edx;" */
|
|
asm volatile(".byte 0x0f, 0x01, 0xc8;"
|
|
:: "a" (eax), "c" (ecx), "d"(edx));
|
|
}
|
|
|
|
static __always_inline void __monitorx(const void *eax, unsigned long ecx,
|
|
unsigned long edx)
|
|
{
|
|
/* "monitorx %eax, %ecx, %edx;" */
|
|
asm volatile(".byte 0x0f, 0x01, 0xfa;"
|
|
:: "a" (eax), "c" (ecx), "d"(edx));
|
|
}
|
|
|
|
static __always_inline void __mwait(unsigned long eax, unsigned long ecx)
|
|
{
|
|
mds_idle_clear_cpu_buffers();
|
|
|
|
/* "mwait %eax, %ecx;" */
|
|
asm volatile(".byte 0x0f, 0x01, 0xc9;"
|
|
:: "a" (eax), "c" (ecx));
|
|
}
|
|
|
|
/*
|
|
* MWAITX allows for a timer expiration to get the core out a wait state in
|
|
* addition to the default MWAIT exit condition of a store appearing at a
|
|
* monitored virtual address.
|
|
*
|
|
* Registers:
|
|
*
|
|
* MWAITX ECX[1]: enable timer if set
|
|
* MWAITX EBX[31:0]: max wait time expressed in SW P0 clocks. The software P0
|
|
* frequency is the same as the TSC frequency.
|
|
*
|
|
* Below is a comparison between MWAIT and MWAITX on AMD processors:
|
|
*
|
|
* MWAIT MWAITX
|
|
* opcode 0f 01 c9 | 0f 01 fb
|
|
* ECX[0] value of RFLAGS.IF seen by instruction
|
|
* ECX[1] unused/#GP if set | enable timer if set
|
|
* ECX[31:2] unused/#GP if set
|
|
* EAX unused (reserve for hint)
|
|
* EBX[31:0] unused | max wait time (P0 clocks)
|
|
*
|
|
* MONITOR MONITORX
|
|
* opcode 0f 01 c8 | 0f 01 fa
|
|
* EAX (logical) address to monitor
|
|
* ECX #GP if not zero
|
|
*/
|
|
static __always_inline void __mwaitx(unsigned long eax, unsigned long ebx,
|
|
unsigned long ecx)
|
|
{
|
|
/* No MDS buffer clear as this is AMD/HYGON only */
|
|
|
|
/* "mwaitx %eax, %ebx, %ecx;" */
|
|
asm volatile(".byte 0x0f, 0x01, 0xfb;"
|
|
:: "a" (eax), "b" (ebx), "c" (ecx));
|
|
}
|
|
|
|
static __always_inline void __sti_mwait(unsigned long eax, unsigned long ecx)
|
|
{
|
|
mds_idle_clear_cpu_buffers();
|
|
/* "mwait %eax, %ecx;" */
|
|
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
|
|
:: "a" (eax), "c" (ecx));
|
|
}
|
|
|
|
/*
|
|
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
|
|
* which can obviate IPI to trigger checking of need_resched.
|
|
* We execute MONITOR against need_resched and enter optimized wait state
|
|
* through MWAIT. Whenever someone changes need_resched, we would be woken
|
|
* up from MWAIT (without an IPI).
|
|
*
|
|
* New with Core Duo processors, MWAIT can take some hints based on CPU
|
|
* capability.
|
|
*/
|
|
static __always_inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
|
|
{
|
|
if (static_cpu_has_bug(X86_BUG_MONITOR) || !current_set_polling_and_test()) {
|
|
const void *addr = ¤t_thread_info()->flags;
|
|
|
|
alternative_input("", "clflush (%[addr])", X86_BUG_CLFLUSH_MONITOR, [addr] "a" (addr));
|
|
__monitor(addr, 0, 0);
|
|
|
|
if (!need_resched()) {
|
|
if (ecx & 1) {
|
|
__mwait(eax, ecx);
|
|
} else {
|
|
__sti_mwait(eax, ecx);
|
|
raw_local_irq_disable();
|
|
}
|
|
}
|
|
}
|
|
current_clr_polling();
|
|
}
|
|
|
|
/*
|
|
* Caller can specify whether to enter C0.1 (low latency, less
|
|
* power saving) or C0.2 state (saves more power, but longer wakeup
|
|
* latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR
|
|
* which can force requests for C0.2 to be downgraded to C0.1.
|
|
*/
|
|
static inline void __tpause(u32 ecx, u32 edx, u32 eax)
|
|
{
|
|
/* "tpause %ecx, %edx, %eax;" */
|
|
#ifdef CONFIG_AS_TPAUSE
|
|
asm volatile("tpause %%ecx\n"
|
|
:
|
|
: "c"(ecx), "d"(edx), "a"(eax));
|
|
#else
|
|
asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
|
|
:
|
|
: "c"(ecx), "d"(edx), "a"(eax));
|
|
#endif
|
|
}
|
|
|
|
#endif /* _ASM_X86_MWAIT_H */
|