mirror of
				git://git.yoctoproject.org/linux-yocto.git
				synced 2025-10-22 15:03:53 +02:00 
			
		
		
		
	x86,fs/resctrl: Remove inappropriate references to cacheinfo in the resctrl subsystem
In the resctrl subsystem's Sub-NUMA Cluster (SNC) mode, the rdt_mon_domain
structure representing a NUMA node relies on the cacheinfo interface
(rdt_mon_domain::ci) to store L3 cache information (e.g., shared_cpu_map)
for monitoring. The L3 cache information of a SNC NUMA node determines
which domains are summed for the "top level" L3-scoped events.
rdt_mon_domain::ci is initialized using the first online CPU of a NUMA
node. When this CPU goes offline, its shared_cpu_map is cleared to contain
only the offline CPU itself. Subsequently, attempting to read counters
via smp_call_on_cpu(offline_cpu) fails (and error ignored), returning
zero values for "top-level events" without any error indication.
Replace the cacheinfo references in struct rdt_mon_domain and struct
rmid_read with the cacheinfo ID (a unique identifier for the L3 cache).
rdt_domain_hdr::cpu_mask contains the online CPUs associated with that
domain. When reading "top-level events", select a CPU from
rdt_domain_hdr::cpu_mask and utilize its L3 shared_cpu_map to determine
valid CPUs for reading RMID counter via the MSR interface.
Considering all CPUs associated with the L3 cache improves the chances
of picking a housekeeping CPU on which the counter reading work can be
queued, avoiding an unnecessary IPI.
Fixes: 328ea68874 ("x86/resctrl: Prepare for new Sub-NUMA Cluster (SNC) monitor files")
Signed-off-by: Qinyun Tan <qinyuntan@linux.alibaba.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/20250530182053.37502-2-qinyuntan@linux.alibaba.com
			
			
This commit is contained in:
		
							parent
							
								
									9afe652958
								
							
						
					
					
						commit
						594902c986
					
				|  | @ -498,6 +498,7 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) | |||
| 	struct rdt_hw_mon_domain *hw_dom; | ||||
| 	struct rdt_domain_hdr *hdr; | ||||
| 	struct rdt_mon_domain *d; | ||||
| 	struct cacheinfo *ci; | ||||
| 	int err; | ||||
| 
 | ||||
| 	lockdep_assert_held(&domain_list_lock); | ||||
|  | @ -525,12 +526,13 @@ static void domain_add_cpu_mon(int cpu, struct rdt_resource *r) | |||
| 	d = &hw_dom->d_resctrl; | ||||
| 	d->hdr.id = id; | ||||
| 	d->hdr.type = RESCTRL_MON_DOMAIN; | ||||
| 	d->ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); | ||||
| 	if (!d->ci) { | ||||
| 	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); | ||||
| 	if (!ci) { | ||||
| 		pr_warn_once("Can't find L3 cache for CPU:%d resource %s\n", cpu, r->name); | ||||
| 		mon_domain_free(hw_dom); | ||||
| 		return; | ||||
| 	} | ||||
| 	d->ci_id = ci->id; | ||||
| 	cpumask_set_cpu(cpu, &d->hdr.cpu_mask); | ||||
| 
 | ||||
| 	arch_mon_domain_online(r, d); | ||||
|  |  | |||
|  | @ -594,9 +594,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) | |||
| 	struct rmid_read rr = {0}; | ||||
| 	struct rdt_mon_domain *d; | ||||
| 	struct rdtgroup *rdtgrp; | ||||
| 	int domid, cpu, ret = 0; | ||||
| 	struct rdt_resource *r; | ||||
| 	struct cacheinfo *ci; | ||||
| 	struct mon_data *md; | ||||
| 	int domid, ret = 0; | ||||
| 
 | ||||
| 	rdtgrp = rdtgroup_kn_lock_live(of->kn); | ||||
| 	if (!rdtgrp) { | ||||
|  | @ -623,10 +624,14 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) | |||
| 		 * one that matches this cache id. | ||||
| 		 */ | ||||
| 		list_for_each_entry(d, &r->mon_domains, hdr.list) { | ||||
| 			if (d->ci->id == domid) { | ||||
| 				rr.ci = d->ci; | ||||
| 			if (d->ci_id == domid) { | ||||
| 				rr.ci_id = d->ci_id; | ||||
| 				cpu = cpumask_any(&d->hdr.cpu_mask); | ||||
| 				ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); | ||||
| 				if (!ci) | ||||
| 					continue; | ||||
| 				mon_event_read(&rr, r, NULL, rdtgrp, | ||||
| 					       &d->ci->shared_cpu_map, evtid, false); | ||||
| 					       &ci->shared_cpu_map, evtid, false); | ||||
| 				goto checkresult; | ||||
| 			} | ||||
| 		} | ||||
|  |  | |||
|  | @ -98,7 +98,7 @@ struct mon_data { | |||
|  *	   domains in @r sharing L3 @ci.id | ||||
|  * @evtid: Which monitor event to read. | ||||
|  * @first: Initialize MBM counter when true. | ||||
|  * @ci:    Cacheinfo for L3. Only set when @d is NULL. Used when summing domains. | ||||
|  * @ci_id: Cacheinfo id for L3. Only set when @d is NULL. Used when summing domains. | ||||
|  * @err:   Error encountered when reading counter. | ||||
|  * @val:   Returned value of event counter. If @rgrp is a parent resource group, | ||||
|  *	   @val includes the sum of event counts from its child resource groups. | ||||
|  | @ -112,7 +112,7 @@ struct rmid_read { | |||
| 	struct rdt_mon_domain	*d; | ||||
| 	enum resctrl_event_id	evtid; | ||||
| 	bool			first; | ||||
| 	struct cacheinfo	*ci; | ||||
| 	unsigned int		ci_id; | ||||
| 	int			err; | ||||
| 	u64			val; | ||||
| 	void			*arch_mon_ctx; | ||||
|  |  | |||
|  | @ -361,6 +361,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) | |||
| { | ||||
| 	int cpu = smp_processor_id(); | ||||
| 	struct rdt_mon_domain *d; | ||||
| 	struct cacheinfo *ci; | ||||
| 	struct mbm_state *m; | ||||
| 	int err, ret; | ||||
| 	u64 tval = 0; | ||||
|  | @ -388,7 +389,8 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) | |||
| 	} | ||||
| 
 | ||||
| 	/* Summing domains that share a cache, must be on a CPU for that cache. */ | ||||
| 	if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map)) | ||||
| 	ci = get_cpu_cacheinfo_level(cpu, RESCTRL_L3_CACHE); | ||||
| 	if (!ci || ci->id != rr->ci_id) | ||||
| 		return -EINVAL; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -400,7 +402,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr) | |||
| 	 */ | ||||
| 	ret = -EINVAL; | ||||
| 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) { | ||||
| 		if (d->ci->id != rr->ci->id) | ||||
| 		if (d->ci_id != rr->ci_id) | ||||
| 			continue; | ||||
| 		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, | ||||
| 					     rr->evtid, &tval, rr->arch_mon_ctx); | ||||
|  |  | |||
|  | @ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, | |||
| 	char name[32]; | ||||
| 
 | ||||
| 	snc_mode = r->mon_scope == RESCTRL_L3_NODE; | ||||
| 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); | ||||
| 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); | ||||
| 	if (snc_mode) | ||||
| 		sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); | ||||
| 
 | ||||
|  | @ -3061,7 +3061,7 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d, | |||
| 		return -EPERM; | ||||
| 
 | ||||
| 	list_for_each_entry(mevt, &r->evt_list, list) { | ||||
| 		domid = do_sum ? d->ci->id : d->hdr.id; | ||||
| 		domid = do_sum ? d->ci_id : d->hdr.id; | ||||
| 		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum); | ||||
| 		if (WARN_ON_ONCE(!priv)) | ||||
| 			return -EINVAL; | ||||
|  | @ -3089,7 +3089,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, | |||
| 	lockdep_assert_held(&rdtgroup_mutex); | ||||
| 
 | ||||
| 	snc_mode = r->mon_scope == RESCTRL_L3_NODE; | ||||
| 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); | ||||
| 	sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci_id : d->hdr.id); | ||||
| 	kn = kernfs_find_and_get(parent_kn, name); | ||||
| 	if (kn) { | ||||
| 		/*
 | ||||
|  |  | |||
|  | @ -159,7 +159,7 @@ struct rdt_ctrl_domain { | |||
| /**
 | ||||
|  * struct rdt_mon_domain - group of CPUs sharing a resctrl monitor resource | ||||
|  * @hdr:		common header for different domain types | ||||
|  * @ci:			cache info for this domain | ||||
|  * @ci_id:		cache info id for this domain | ||||
|  * @rmid_busy_llc:	bitmap of which limbo RMIDs are above threshold | ||||
|  * @mbm_total:		saved state for MBM total bandwidth | ||||
|  * @mbm_local:		saved state for MBM local bandwidth | ||||
|  | @ -170,7 +170,7 @@ struct rdt_ctrl_domain { | |||
|  */ | ||||
| struct rdt_mon_domain { | ||||
| 	struct rdt_domain_hdr		hdr; | ||||
| 	struct cacheinfo		*ci; | ||||
| 	unsigned int			ci_id; | ||||
| 	unsigned long			*rmid_busy_llc; | ||||
| 	struct mbm_state		*mbm_total; | ||||
| 	struct mbm_state		*mbm_local; | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 Qinyun Tan
						Qinyun Tan