- Do not remove the MCE sysfs hierarchy if thresholding sysfs nodes init

fails due to new/unknown banks present, which in itself is not fatal
   anyway; add default names for new banks
 
 - Make sure MCE polling settings are honored after CMCI storms
 
 - Make sure MCE threshold limit is reset after the thresholding interrupt has
   been serviced
 
 - Clean up properly and disable CMCI banks on shutdown so that
   a second/kexec-ed kernel can rediscover those banks again
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmhqLrkACgkQEsHwGGHe
 VUqm/hAAlg6nX/uJIeszPpZK1o4j6WP3IZ0RAo5yxd9mQC+zNP35Xqv3MOUBZVGE
 1EhrQSgrqeC9NIbT8a9ansnc3BKxODOB8raoNA3HpViTG3LeQ5ycmpJv5qWqcr8B
 EV14BpZHAEx3AXAiDcGXkuKYM9RrpHtOwtyKjib4ScT+xCkzQzJtrO2hPTk8Slr/
 8Hly14+st7Iqvnh9nH1TeMOjogGg+lr9cIlG6rzZGj3IPbfEWbvmHhjJut7p4TfU
 g5AY3djzJ8eyrOv3aKxgVDkJ7qet283sc+mvTzrhAnoJEYI9v7tde4g6AKJj0BLA
 +u0tTOu47c/wijLcHPpaS+zwifo4BxKDIG8q+tHT6ixMMPT3Mev8nwanjAotjgz7
 WSN3eL9jie+IJPq4c0eN2z2um5tiqxFHF5M7q4Ol5VJiUU8Wa31B/pzPZymQoCWZ
 F/SC5VVq+ZwfRqzQMAK5dHQSj1zvkbtb0HOMYoFTOU1JocF7Px1gxx401UQ6pKdG
 Qw2rE1SUKxaQT4HZ2+SRvO2egJItsQw4r+ZT/7sMQhII9v9qK500kD8o30HcCEh3
 o8kT+dQwKEv0KLga5vYFnITT29XM9GySdAEI7HxKi/kpnwaUppUljuycfhzAMtYe
 xz86txluUsk7sUW8eUPgjuKPYO3a20FkY8VB5TYWTHxJdMAeb4E=
 =JI7d
 -----END PGP SIGNATURE-----

Merge tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS fixes from Borislav Petkov:

 - Do not remove the MCE sysfs hierarchy if thresholding sysfs nodes
   init fails due to new/unknown banks present, which in itself is not
   fatal anyway; add default names for new banks

 - Make sure MCE polling settings are honored after CMCI storms

 - Make sure MCE threshold limit is reset after the thresholding
   interrupt has been serviced

 - Clean up properly and disable CMCI banks on shutdown so that a
   second/kexec-ed kernel can rediscover those banks again

* tag 'ras_urgent_for_v6.16_rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Make sure CMCI banks are cleared during shutdown on Intel
  x86/mce/amd: Fix threshold limit reset
  x86/mce/amd: Add default names for MCA banks and blocks
  x86/mce: Ensure user polling settings are honored when restarting timer
  x86/mce: Don't remove sysfs if thresholding sysfs init fails
This commit is contained in:
Linus Torvalds 2025-07-06 09:17:48 -07:00
commit bdde3141ce
3 changed files with 29 additions and 24 deletions

View File

@ -350,7 +350,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
struct thresh_restart {
struct threshold_block *b;
int reset;
int set_lvt_off;
int lvt_off;
u16 old_limit;
@ -432,13 +431,13 @@ static void threshold_restart_bank(void *_tr)
rdmsr(tr->b->address, lo, hi);
if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
tr->reset = 1; /* limit cannot be lower than err count */
if (tr->reset) { /* reset err count and overflow bit */
hi =
(hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
(THRESHOLD_MAX - tr->b->threshold_limit);
/*
* Reset error count and overflow bit.
* This is done during init or after handling an interrupt.
*/
if (hi & MASK_OVERFLOW_HI || tr->set_lvt_off) {
hi &= ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI);
hi |= THRESHOLD_MAX - tr->b->threshold_limit;
} else if (tr->old_limit) { /* change limit w/o reset */
int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
@ -1113,13 +1112,20 @@ static const char *get_name(unsigned int cpu, unsigned int bank, struct threshol
}
bank_type = smca_get_bank_type(cpu, bank);
if (bank_type >= N_SMCA_BANK_TYPES)
return NULL;
if (b && (bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2)) {
if (b->block < ARRAY_SIZE(smca_umc_block_names))
return smca_umc_block_names[b->block];
return NULL;
}
if (b && b->block) {
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_block_%u", b->block);
return buf_mcatype;
}
if (bank_type >= N_SMCA_BANK_TYPES) {
snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "th_bank_%u", bank);
return buf_mcatype;
}
if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1)

View File

@ -1740,6 +1740,11 @@ static void mc_poll_banks_default(void)
void (*mc_poll_banks)(void) = mc_poll_banks_default;
static bool should_enable_timer(unsigned long iv)
{
return !mca_cfg.ignore_ce && iv;
}
static void mce_timer_fn(struct timer_list *t)
{
struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@ -1763,7 +1768,7 @@ static void mce_timer_fn(struct timer_list *t)
if (mce_get_storm_mode()) {
__start_timer(t, HZ);
} else {
} else if (should_enable_timer(iv)) {
__this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
@ -2156,11 +2161,10 @@ static void mce_start_timer(struct timer_list *t)
{
unsigned long iv = check_interval * HZ;
if (mca_cfg.ignore_ce || !iv)
return;
this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
if (should_enable_timer(iv)) {
this_cpu_write(mce_next_interval, iv);
__start_timer(t, iv);
}
}
static void __mcheck_cpu_setup_timer(void)
@ -2801,15 +2805,9 @@ static int mce_cpu_dead(unsigned int cpu)
static int mce_cpu_online(unsigned int cpu)
{
struct timer_list *t = this_cpu_ptr(&mce_timer);
int ret;
mce_device_create(cpu);
ret = mce_threshold_create_device(cpu);
if (ret) {
mce_device_remove(cpu);
return ret;
}
mce_threshold_create_device(cpu);
mce_reenable_cpu();
mce_start_timer(t);
return 0;

View File

@ -478,6 +478,7 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c)
void mce_intel_feature_clear(struct cpuinfo_x86 *c)
{
intel_clear_lmce();
cmci_clear();
}
bool intel_filter_mce(struct mce *m)