From 22679d807dea5c065d8019acfce48f20e87ba5ca Mon Sep 17 00:00:00 2001 From: Abhinav Ananthu Date: Fri, 20 Jun 2025 15:29:21 +0530 Subject: [PATCH 01/11] rust: opp: use c_* types via kernel prelude Update OPP FFI callback signatures to use `c_int` from the `kernel::prelude`, instead of accessing it via `kernel::ffi::c_int`. Although these types are defined in a crate named `ffi`, they are re-exported via the `kernel::prelude` and should be used from there. This aligns with the Rust-for-Linux coding guidelines and ensures ABI correctness when interfacing with C code. Suggested-by: Viresh Kumar Signed-off-by: Abhinav Ananthu Signed-off-by: Viresh Kumar --- rust/kernel/opp.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rust/kernel/opp.rs b/rust/kernel/opp.rs index a566fc3e7dcb..846583da9a2f 100644 --- a/rust/kernel/opp.rs +++ b/rust/kernel/opp.rs @@ -514,9 +514,9 @@ impl Config { dev: *mut bindings::device, opp_table: *mut bindings::opp_table, opp: *mut bindings::dev_pm_opp, - _data: *mut kernel::ffi::c_void, + _data: *mut c_void, scaling_down: bool, - ) -> kernel::ffi::c_int { + ) -> c_int { from_result(|| { // SAFETY: 'dev' is guaranteed by the C code to be valid. let dev = unsafe { Device::get_device(dev) }; @@ -540,8 +540,8 @@ impl Config { old_opp: *mut bindings::dev_pm_opp, new_opp: *mut bindings::dev_pm_opp, regulators: *mut *mut bindings::regulator, - count: kernel::ffi::c_uint, - ) -> kernel::ffi::c_int { + count: c_uint, + ) -> c_int { from_result(|| { // SAFETY: 'dev' is guaranteed by the C code to be valid. let dev = unsafe { Device::get_device(dev) }; From 01d40d3c146449e8538bfffc65e90bfbfc2a99e8 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 10 Jul 2025 12:26:56 +0200 Subject: [PATCH 02/11] Documentation: power: Remove info about non-existing QoS interfaces cpu_latency_qos_add|remove_notifier() doesn't exist, hence let's drop the documentation of them. Signed-off-by: Ulf Hansson Link: https://patch.msgid.link/20250710102656.127654-1-ulf.hansson@linaro.org Signed-off-by: Rafael J. Wysocki --- Documentation/power/pm_qos_interface.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Documentation/power/pm_qos_interface.rst b/Documentation/power/pm_qos_interface.rst index 69b0fe3e2542..5019c79c7710 100644 --- a/Documentation/power/pm_qos_interface.rst +++ b/Documentation/power/pm_qos_interface.rst @@ -52,13 +52,6 @@ int cpu_latency_qos_request_active(handle): Returns if the request is still active, i.e. it has not been removed from the CPU latency QoS list. -int cpu_latency_qos_add_notifier(notifier): - Adds a notification callback function to the CPU latency QoS. The callback is - called when the aggregated value for the CPU latency QoS is changed. - -int cpu_latency_qos_remove_notifier(notifier): - Removes the notification callback function from the CPU latency QoS. - From user space: From ee2736848f1c9b64a9b1321d839f084ce80d4e66 Mon Sep 17 00:00:00 2001 From: "Yury Norov [NVIDIA]" Date: Wed, 4 Jun 2025 17:39:06 -0400 Subject: [PATCH 03/11] cpuidle: dt: fix opencoded for_each_cpu() in idle_state_valid() The function opencodes the for_each_cpu_from() by using an open for-loop. Fix that in sake of readability. While there, drop the 'valid' variable as it's pretty useless here. Signed-off-by: Yury Norov [NVIDIA] Link: https://patch.msgid.link/20250604213908.27819-1-yury.norov@gmail.com Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/dt_idle_states.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c index 97feb7d8fb23..558d49838990 100644 --- a/drivers/cpuidle/dt_idle_states.c +++ b/drivers/cpuidle/dt_idle_states.c @@ -98,7 +98,6 @@ static bool idle_state_valid(struct device_node *state_node, unsigned int idx, { int cpu; struct device_node *cpu_node, *curr_state_node; - bool valid = true; /* * Compare idle state phandles for index idx on all CPUs in the @@ -107,20 +106,17 @@ static bool idle_state_valid(struct device_node *state_node, unsigned int idx, * retrieved from. If a mismatch is found bail out straight * away since we certainly hit a firmware misconfiguration. */ - for (cpu = cpumask_next(cpumask_first(cpumask), cpumask); - cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpumask)) { + cpu = cpumask_first(cpumask) + 1; + for_each_cpu_from(cpu, cpumask) { cpu_node = of_cpu_device_node_get(cpu); curr_state_node = of_get_cpu_state_node(cpu_node, idx); - if (state_node != curr_state_node) - valid = false; - of_node_put(curr_state_node); of_node_put(cpu_node); - if (!valid) - break; + if (state_node != curr_state_node) + return false; } - return valid; + return true; } /** From 914cc799b28f17d369d5b4db3b941957d18157e8 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Mon, 21 Apr 2025 11:00:17 +0800 Subject: [PATCH 04/11] PM / devfreq: governor: Replace sscanf() with kstrtoul() in set_freq_store() Replace sscanf() with kstrtoul() in set_freq_store() and check the result to avoid invalid input. Signed-off-by: Lifeng Zheng Link: https://lore.kernel.org/lkml/20250421030020.3108405-2-zhenglifeng1@huawei.com/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/governor_userspace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index d1aa6806b683..175de0c0b50e 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -39,10 +40,13 @@ static ssize_t set_freq_store(struct device *dev, struct device_attribute *attr, unsigned long wanted; int err = 0; + err = kstrtoul(buf, 0, &wanted); + if (err) + return err; + mutex_lock(&devfreq->lock); data = devfreq->governor_data; - sscanf(buf, "%lu", &wanted); data->user_frequency = wanted; data->valid = true; err = update_devfreq(devfreq); From 5487f2595bc821348848b0708f42df825d856f9e Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Mon, 21 Apr 2025 11:00:18 +0800 Subject: [PATCH 05/11] PM / devfreq: Limit max_freq with scaling_min_freq Limit max_freq in devfreq_get_freq_range() with scaling_min_freq to avoid showing an unreachable freq when reading it. Use macro clamp to simplify code. Signed-off-by: Lifeng Zheng Link: https://lore.kernel.org/lkml/20250421030020.3108405-3-zhenglifeng1@huawei.com/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 98657d3b9435..2810c84b9f8a 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -152,11 +152,8 @@ void devfreq_get_freq_range(struct devfreq *devfreq, (unsigned long)HZ_PER_KHZ * qos_max_freq); /* Apply constraints from OPP interface */ - *min_freq = max(*min_freq, devfreq->scaling_min_freq); - *max_freq = min(*max_freq, devfreq->scaling_max_freq); - - if (*min_freq > *max_freq) - *min_freq = *max_freq; + *max_freq = clamp(*max_freq, devfreq->scaling_min_freq, devfreq->scaling_max_freq); + *min_freq = clamp(*min_freq, devfreq->scaling_min_freq, *max_freq); } EXPORT_SYMBOL(devfreq_get_freq_range); From a98d36802f677d90333cc431e23f13cd53608a96 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Mon, 21 Apr 2025 11:00:19 +0800 Subject: [PATCH 06/11] PM / devfreq: Remove redundant devfreq_get_freq_range() calling in devfreq_add_device() The calling of devfreq_get_freq_range() in devfreq_add_device() is redundant because min_freq and max_freq are never used. Remove it. Signed-off-by: Lifeng Zheng Link: https://lore.kernel.org/lkml/20250421030020.3108405-4-zhenglifeng1@huawei.com/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 2810c84b9f8a..18e3f7e063a4 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -804,7 +804,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - unsigned long min_freq, max_freq; int err = 0; if (!dev || !profile || !governor_name) { @@ -872,8 +871,6 @@ struct devfreq *devfreq_add_device(struct device *dev, goto err_dev; } - devfreq_get_freq_range(devfreq, &min_freq, &max_freq); - devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); devfreq->opp_table = dev_pm_opp_get_opp_table(dev); if (IS_ERR(devfreq->opp_table)) From bab7834c03820eb11269bc48f07c3800192460d2 Mon Sep 17 00:00:00 2001 From: Lifeng Zheng Date: Mon, 21 Apr 2025 11:00:20 +0800 Subject: [PATCH 07/11] PM / devfreq: Check governor before using governor->name Commit 96ffcdf239de ("PM / devfreq: Remove redundant governor_name from struct devfreq") removes governor_name and uses governor->name to replace it. But devfreq->governor may be NULL and directly using devfreq->governor->name may cause null pointer exception. Move the check of governor to before using governor->name. Fixes: 96ffcdf239de ("PM / devfreq: Remove redundant governor_name from struct devfreq") Signed-off-by: Lifeng Zheng Link: https://lore.kernel.org/lkml/20250421030020.3108405-5-zhenglifeng1@huawei.com/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 18e3f7e063a4..46f3a8053197 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1376,15 +1376,11 @@ int devfreq_remove_governor(struct devfreq_governor *governor) int ret; struct device *dev = devfreq->dev.parent; + if (!devfreq->governor) + continue; + if (!strncmp(devfreq->governor->name, governor->name, DEVFREQ_NAME_LEN)) { - /* we should have a devfreq governor! */ - if (!devfreq->governor) { - dev_warn(dev, "%s: Governor %s NOT present\n", - __func__, governor->name); - continue; - /* Fall through */ - } ret = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL); if (ret) { From 78c5845fbbf6aaeb9959c5fbaee5cc53ef5f38c2 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Fri, 7 Feb 2025 16:13:50 -1000 Subject: [PATCH 08/11] PM / devfreq: Fix a index typo in trans_stat Fixes: 4920ee6dcfaf ("PM / devfreq: Convert to use sysfs_emit_at() API") Signed-off-by: pls Link: https://patchwork.kernel.org/project/linux-pm/patch/20250515143100.17849-1-chanwoo@kernel.org/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/devfreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 46f3a8053197..c5f5960e643b 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -1733,7 +1733,7 @@ static ssize_t trans_stat_show(struct device *dev, for (i = 0; i < max_state; i++) { if (len >= PAGE_SIZE - 1) break; - if (df->freq_table[2] == df->previous_freq) + if (df->freq_table[i] == df->previous_freq) len += sysfs_emit_at(buf, len, "*"); else len += sysfs_emit_at(buf, len, " "); From c3bc361393b289df3499f5a87276367c71fae7c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Tue, 13 May 2025 22:39:02 +0200 Subject: [PATCH 09/11] PM / devfreq: sun8i-a33-mbus: Simplify by using more devm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use devm allocators for enabling the bus clock and clk_rate_exclusive_get(). This simplifies error handling and the remove callback. Reviewed-by: Chen-Yu Tsai Signed-off-by: Uwe Kleine-König Tested-by: Corentin LABBE Link: https://patchwork.kernel.org/project/linux-pm/patch/20250513203908.205060-2-u.kleine-koenig@baylibre.com/ Signed-off-by: Chanwoo Choi --- drivers/devfreq/sun8i-a33-mbus.c | 38 ++++++++------------------------ 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/drivers/devfreq/sun8i-a33-mbus.c b/drivers/devfreq/sun8i-a33-mbus.c index 7c6ae91ede1f..4bd5657558d6 100644 --- a/drivers/devfreq/sun8i-a33-mbus.c +++ b/drivers/devfreq/sun8i-a33-mbus.c @@ -360,7 +360,7 @@ static int sun8i_a33_mbus_probe(struct platform_device *pdev) if (IS_ERR(priv->reg_mbus)) return PTR_ERR(priv->reg_mbus); - priv->clk_bus = devm_clk_get(dev, "bus"); + priv->clk_bus = devm_clk_get_enabled(dev, "bus"); if (IS_ERR(priv->clk_bus)) return dev_err_probe(dev, PTR_ERR(priv->clk_bus), "failed to get bus clock\n"); @@ -375,24 +375,15 @@ static int sun8i_a33_mbus_probe(struct platform_device *pdev) return dev_err_probe(dev, PTR_ERR(priv->clk_mbus), "failed to get mbus clock\n"); - ret = clk_prepare_enable(priv->clk_bus); - if (ret) - return dev_err_probe(dev, ret, - "failed to enable bus clock\n"); - /* Lock the DRAM clock rate to keep priv->nominal_bw in sync. */ - ret = clk_rate_exclusive_get(priv->clk_dram); - if (ret) { - err = "failed to lock dram clock rate\n"; - goto err_disable_bus; - } + ret = devm_clk_rate_exclusive_get(dev, priv->clk_dram); + if (ret) + return dev_err_probe(dev, ret, "failed to lock dram clock rate\n"); /* Lock the MBUS clock rate to keep MBUS_TMR_PERIOD in sync. */ - ret = clk_rate_exclusive_get(priv->clk_mbus); - if (ret) { - err = "failed to lock mbus clock rate\n"; - goto err_unlock_dram; - } + ret = devm_clk_rate_exclusive_get(dev, priv->clk_mbus); + if (ret) + return dev_err_probe(dev, ret, "failed to lock mbus clock rate\n"); priv->gov_data.upthreshold = 10; priv->gov_data.downdifferential = 5; @@ -405,10 +396,8 @@ static int sun8i_a33_mbus_probe(struct platform_device *pdev) priv->profile.max_state = max_state; ret = devm_pm_opp_set_clkname(dev, "dram"); - if (ret) { - err = "failed to add OPP table\n"; - goto err_unlock_mbus; - } + if (ret) + return dev_err_probe(dev, ret, "failed to add OPP table\n"); base_freq = clk_get_rate(clk_get_parent(priv->clk_dram)); for (i = 0; i < max_state; ++i) { @@ -448,12 +437,6 @@ static int sun8i_a33_mbus_probe(struct platform_device *pdev) err_remove_opps: dev_pm_opp_remove_all_dynamic(dev); -err_unlock_mbus: - clk_rate_exclusive_put(priv->clk_mbus); -err_unlock_dram: - clk_rate_exclusive_put(priv->clk_dram); -err_disable_bus: - clk_disable_unprepare(priv->clk_bus); return dev_err_probe(dev, ret, err); } @@ -472,9 +455,6 @@ static void sun8i_a33_mbus_remove(struct platform_device *pdev) dev_warn(dev, "failed to restore DRAM frequency: %d\n", ret); dev_pm_opp_remove_all_dynamic(dev); - clk_rate_exclusive_put(priv->clk_mbus); - clk_rate_exclusive_put(priv->clk_dram); - clk_disable_unprepare(priv->clk_bus); } static const struct sun8i_a33_mbus_variant sun50i_a64_mbus = { From 45b9d1da6ca0d0285140f8779793b537e4560d45 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Mon, 23 Jun 2025 22:34:00 +0800 Subject: [PATCH 10/11] PM / devfreq: Allow devfreq driver to add custom sysfs ABIs Extend the devfreq_dev_profile to allow drivers optionally create device-specific sysfs ABIs together with other common devfreq ABIs under the devfreq device path. Reviewed-by: Jonathan Cameron Reviewed-by: Huisong Li Signed-off-by: Jie Zhan Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250623143401.4095045-2-zhanjie9@hisilicon.com/ --- drivers/devfreq/devfreq.c | 1 + include/linux/devfreq.h | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index c5f5960e643b..2e8d01d47f69 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -831,6 +831,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); devfreq->dev.parent = dev; devfreq->dev.class = devfreq_class; + devfreq->dev.groups = profile->dev_groups; devfreq->dev.release = devfreq_dev_release; INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index d312ffbac4dd..dc1075dc3446 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -103,6 +103,8 @@ struct devfreq_dev_status { * * @is_cooling_device: A self-explanatory boolean giving the device a * cooling effect property. + * @dev_groups: Optional device-specific sysfs attribute groups that to + * be attached to the devfreq device. */ struct devfreq_dev_profile { unsigned long initial_freq; @@ -119,6 +121,8 @@ struct devfreq_dev_profile { unsigned int max_state; bool is_cooling_device; + + const struct attribute_group **dev_groups; }; /** From 7da2fdaaa1e6062686ac96a9f096c2d7847533e4 Mon Sep 17 00:00:00 2001 From: Jie Zhan Date: Mon, 23 Jun 2025 22:34:01 +0800 Subject: [PATCH 11/11] PM / devfreq: Add HiSilicon uncore frequency scaling driver Add the HiSilicon uncore frequency scaling driver for Kunpeng SoCs based on the devfreq framework. The uncore domain contains shared computing resources, including system interconnects and L3 cache. The uncore frequency significantly impacts the system-wide performance as well as power consumption. This driver adds support for runtime management of uncore frequency from kernel and userspace. The main function includes setting and getting frequencies, changing frequency scaling policies, and querying the list of CPUs whose performance is significantly related to this uncore frequency domain, etc. The driver communicates with a platform controller through an ACPI PCC mailbox to take the actual actions of frequency scaling. Co-developed-by: Lifeng Zheng Signed-off-by: Lifeng Zheng Reviewed-by: Jonathan Cameron Reviewed-by: Huisong Li Signed-off-by: Jie Zhan Signed-off-by: Chanwoo Choi Link: https://patchwork.kernel.org/project/linux-pm/patch/20250623143401.4095045-3-zhanjie9@hisilicon.com/ --- Documentation/ABI/testing/sysfs-class-devfreq | 9 + drivers/devfreq/Kconfig | 11 + drivers/devfreq/Makefile | 1 + drivers/devfreq/hisi_uncore_freq.c | 658 ++++++++++++++++++ 4 files changed, 679 insertions(+) create mode 100644 drivers/devfreq/hisi_uncore_freq.c diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 1e7e0bb4c14e..df8ba88b9f6a 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -132,3 +132,12 @@ Description: A list of governors that support the node: - simple_ondemand + +What: /sys/class/devfreq/.../related_cpus +Date: June 2025 +Contact: Linux power management list +Description: The list of CPUs whose performance is closely related to the + frequency of this devfreq domain. + + This file is only present if a specific devfreq device is + closely associated with a subset of CPUs. diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index 3c4862a752b5..c999c4a1e567 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -90,6 +90,17 @@ config ARM_EXYNOS_BUS_DEVFREQ and adjusts the operating frequencies and voltages with OPP support. This does not yet operate with optimal voltages. +config ARM_HISI_UNCORE_DEVFREQ + tristate "HiSilicon uncore DEVFREQ Driver" + depends on ACPI && ACPI_PPTT && PCC + select DEVFREQ_GOV_PERFORMANCE + select DEVFREQ_GOV_USERSPACE + help + This adds a DEVFREQ driver that manages uncore frequency scaling for + HiSilicon Kunpeng SoCs. This enables runtime management of uncore + frequency scaling from kernel and userspace. The uncore domain + contains system interconnects and L3 cache. + config ARM_IMX_BUS_DEVFREQ tristate "i.MX Generic Bus DEVFREQ Driver" depends on ARCH_MXC || COMPILE_TEST diff --git a/drivers/devfreq/Makefile b/drivers/devfreq/Makefile index bf40d04928d0..404179d79a9d 100644 --- a/drivers/devfreq/Makefile +++ b/drivers/devfreq/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_DEVFREQ_GOV_PASSIVE) += governor_passive.o # DEVFREQ Drivers obj-$(CONFIG_ARM_EXYNOS_BUS_DEVFREQ) += exynos-bus.o +obj-$(CONFIG_ARM_HISI_UNCORE_DEVFREQ) += hisi_uncore_freq.o obj-$(CONFIG_ARM_IMX_BUS_DEVFREQ) += imx-bus.o obj-$(CONFIG_ARM_IMX8M_DDRC_DEVFREQ) += imx8m-ddrc.o obj-$(CONFIG_ARM_MEDIATEK_CCI_DEVFREQ) += mtk-cci-devfreq.o diff --git a/drivers/devfreq/hisi_uncore_freq.c b/drivers/devfreq/hisi_uncore_freq.c new file mode 100644 index 000000000000..96d1815059e3 --- /dev/null +++ b/drivers/devfreq/hisi_uncore_freq.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HiSilicon uncore frequency scaling driver + * + * Copyright (c) 2025 HiSilicon Co., Ltd + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "governor.h" + +struct hisi_uncore_pcc_data { + u16 status; + u16 resv; + u32 data; +}; + +struct hisi_uncore_pcc_shmem { + struct acpi_pcct_shared_memory head; + struct hisi_uncore_pcc_data pcc_data; +}; + +enum hisi_uncore_pcc_cmd_type { + HUCF_PCC_CMD_GET_CAP = 0, + HUCF_PCC_CMD_GET_FREQ, + HUCF_PCC_CMD_SET_FREQ, + HUCF_PCC_CMD_GET_MODE, + HUCF_PCC_CMD_SET_MODE, + HUCF_PCC_CMD_GET_PLAT_FREQ_NUM, + HUCF_PCC_CMD_GET_PLAT_FREQ_BY_IDX, + HUCF_PCC_CMD_MAX = 256 +}; + +static int hisi_platform_gov_usage; +static DEFINE_MUTEX(hisi_platform_gov_usage_lock); + +enum hisi_uncore_freq_mode { + HUCF_MODE_PLATFORM = 0, + HUCF_MODE_OS, + HUCF_MODE_MAX +}; + +#define HUCF_CAP_PLATFORM_CTRL BIT(0) + +/** + * struct hisi_uncore_freq - hisi uncore frequency scaling device data + * @dev: device of this frequency scaling driver + * @cl: mailbox client object + * @pchan: PCC mailbox channel + * @chan_id: PCC channel ID + * @last_cmd_cmpl_time: timestamp of the last completed PCC command + * @pcc_lock: PCC channel lock + * @devfreq: devfreq data of this hisi_uncore_freq device + * @related_cpus: CPUs whose performance is majorly affected by this + * uncore frequency domain + * @cap: capability flag + */ +struct hisi_uncore_freq { + struct device *dev; + struct mbox_client cl; + struct pcc_mbox_chan *pchan; + int chan_id; + ktime_t last_cmd_cmpl_time; + struct mutex pcc_lock; + struct devfreq *devfreq; + struct cpumask related_cpus; + u32 cap; +}; + +/* PCC channel timeout = PCC nominal latency * NUM */ +#define HUCF_PCC_POLL_TIMEOUT_NUM 1000 +#define HUCF_PCC_POLL_INTERVAL_US 5 + +/* Default polling interval in ms for devfreq governors*/ +#define HUCF_DEFAULT_POLLING_MS 100 + +static void hisi_uncore_free_pcc_chan(struct hisi_uncore_freq *uncore) +{ + guard(mutex)(&uncore->pcc_lock); + pcc_mbox_free_channel(uncore->pchan); + uncore->pchan = NULL; +} + +static void devm_hisi_uncore_free_pcc_chan(void *data) +{ + hisi_uncore_free_pcc_chan(data); +} + +static int hisi_uncore_request_pcc_chan(struct hisi_uncore_freq *uncore) +{ + struct device *dev = uncore->dev; + struct pcc_mbox_chan *pcc_chan; + + uncore->cl = (struct mbox_client) { + .dev = dev, + .tx_block = false, + .knows_txdone = true, + }; + + pcc_chan = pcc_mbox_request_channel(&uncore->cl, uncore->chan_id); + if (IS_ERR(pcc_chan)) + return dev_err_probe(dev, PTR_ERR(pcc_chan), + "Failed to request PCC channel %u\n", uncore->chan_id); + + if (!pcc_chan->shmem_base_addr) { + pcc_mbox_free_channel(pcc_chan); + return dev_err_probe(dev, -EINVAL, + "Invalid PCC shared memory address\n"); + } + + if (pcc_chan->shmem_size < sizeof(struct hisi_uncore_pcc_shmem)) { + pcc_mbox_free_channel(pcc_chan); + return dev_err_probe(dev, -EINVAL, + "Invalid PCC shared memory size (%lluB)\n", + pcc_chan->shmem_size); + } + + uncore->pchan = pcc_chan; + + return devm_add_action_or_reset(uncore->dev, + devm_hisi_uncore_free_pcc_chan, uncore); +} + +static acpi_status hisi_uncore_pcc_reg_scan(struct acpi_resource *res, + void *ctx) +{ + struct acpi_resource_generic_register *reg; + struct hisi_uncore_freq *uncore; + + if (!res || res->type != ACPI_RESOURCE_TYPE_GENERIC_REGISTER) + return AE_OK; + + reg = &res->data.generic_reg; + if (reg->space_id != ACPI_ADR_SPACE_PLATFORM_COMM) + return AE_OK; + + if (!ctx) + return AE_ERROR; + + uncore = ctx; + /* PCC subspace ID stored in Access Size */ + uncore->chan_id = reg->access_size; + + return AE_CTRL_TERMINATE; +} + +static int hisi_uncore_init_pcc_chan(struct hisi_uncore_freq *uncore) +{ + acpi_handle handle = ACPI_HANDLE(uncore->dev); + acpi_status status; + int rc; + + uncore->chan_id = -1; + status = acpi_walk_resources(handle, METHOD_NAME__CRS, + hisi_uncore_pcc_reg_scan, uncore); + if (ACPI_FAILURE(status) || uncore->chan_id < 0) + return dev_err_probe(uncore->dev, -ENODEV, + "Failed to get a PCC channel\n"); + + + rc = devm_mutex_init(uncore->dev, &uncore->pcc_lock); + if (rc) + return rc; + + return hisi_uncore_request_pcc_chan(uncore); +} + +static int hisi_uncore_cmd_send(struct hisi_uncore_freq *uncore, + u8 cmd, u32 *data) +{ + struct hisi_uncore_pcc_shmem __iomem *addr; + struct hisi_uncore_pcc_shmem shmem; + struct pcc_mbox_chan *pchan; + unsigned int mrtt; + s64 time_delta; + u16 status; + int rc; + + guard(mutex)(&uncore->pcc_lock); + + pchan = uncore->pchan; + if (!pchan) + return -ENODEV; + + addr = (struct hisi_uncore_pcc_shmem __iomem *)pchan->shmem; + if (!addr) + return -EINVAL; + + /* Handle the Minimum Request Turnaround Time (MRTT) */ + mrtt = pchan->min_turnaround_time; + time_delta = ktime_us_delta(ktime_get(), uncore->last_cmd_cmpl_time); + if (mrtt > time_delta) + udelay(mrtt - time_delta); + + /* Copy data */ + shmem.head = (struct acpi_pcct_shared_memory) { + .signature = PCC_SIGNATURE | uncore->chan_id, + .command = cmd, + }; + shmem.pcc_data.data = *data; + memcpy_toio(addr, &shmem, sizeof(shmem)); + + /* Ring doorbell */ + rc = mbox_send_message(pchan->mchan, &cmd); + if (rc < 0) { + dev_err(uncore->dev, "Failed to send mbox message, %d\n", rc); + return rc; + } + + /* Wait status */ + rc = readw_poll_timeout(&addr->head.status, status, + status & (PCC_STATUS_CMD_COMPLETE | + PCC_STATUS_ERROR), + HUCF_PCC_POLL_INTERVAL_US, + pchan->latency * HUCF_PCC_POLL_TIMEOUT_NUM); + if (rc) { + dev_err(uncore->dev, "PCC channel response timeout, cmd=%u\n", cmd); + } else if (status & PCC_STATUS_ERROR) { + dev_err(uncore->dev, "PCC cmd error, cmd=%u\n", cmd); + rc = -EIO; + } + + uncore->last_cmd_cmpl_time = ktime_get(); + + /* Copy data back */ + memcpy_fromio(data, &addr->pcc_data.data, sizeof(*data)); + + /* Clear mailbox active req */ + mbox_client_txdone(pchan->mchan, rc); + + return rc; +} + +static int hisi_uncore_target(struct device *dev, unsigned long *freq, + u32 flags) +{ + struct hisi_uncore_freq *uncore = dev_get_drvdata(dev); + struct dev_pm_opp *opp; + u32 data; + + if (WARN_ON(!uncore || !uncore->pchan)) + return -ENODEV; + + opp = devfreq_recommended_opp(dev, freq, flags); + if (IS_ERR(opp)) { + dev_err(dev, "Failed to get opp for freq %lu hz\n", *freq); + return PTR_ERR(opp); + } + dev_pm_opp_put(opp); + + data = (u32)(dev_pm_opp_get_freq(opp) / HZ_PER_MHZ); + + return hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_FREQ, &data); +} + +static int hisi_uncore_get_dev_status(struct device *dev, + struct devfreq_dev_status *stat) +{ + /* Not used */ + return 0; +} + +static int hisi_uncore_get_cur_freq(struct device *dev, unsigned long *freq) +{ + struct hisi_uncore_freq *uncore = dev_get_drvdata(dev); + u32 data = 0; + int rc; + + if (WARN_ON(!uncore || !uncore->pchan)) + return -ENODEV; + + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_FREQ, &data); + + /* + * Upon a failure, 'data' remains 0 and 'freq' is set to 0 rather than a + * random value. devfreq shouldn't use 'freq' in that case though. + */ + *freq = data * HZ_PER_MHZ; + + return rc; +} + +static void devm_hisi_uncore_remove_opp(void *data) +{ + struct hisi_uncore_freq *uncore = data; + + dev_pm_opp_remove_all_dynamic(uncore->dev); +} + +static int hisi_uncore_init_opp(struct hisi_uncore_freq *uncore) +{ + struct device *dev = uncore->dev; + unsigned long freq_mhz; + u32 num, index; + u32 data = 0; + int rc; + + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_PLAT_FREQ_NUM, + &data); + if (rc) + return dev_err_probe(dev, rc, "Failed to get plat freq num\n"); + + num = data; + + for (index = 0; index < num; index++) { + data = index; + rc = hisi_uncore_cmd_send(uncore, + HUCF_PCC_CMD_GET_PLAT_FREQ_BY_IDX, + &data); + if (rc) { + dev_pm_opp_remove_all_dynamic(dev); + return dev_err_probe(dev, rc, + "Failed to get plat freq at index %u\n", index); + } + freq_mhz = data; + + /* Don't care OPP voltage, take 1V as default */ + rc = dev_pm_opp_add(dev, freq_mhz * HZ_PER_MHZ, 1000000); + if (rc) { + dev_pm_opp_remove_all_dynamic(dev); + return dev_err_probe(dev, rc, + "Add OPP %lu failed\n", freq_mhz); + } + } + + return devm_add_action_or_reset(dev, devm_hisi_uncore_remove_opp, + uncore); +} + +static int hisi_platform_gov_func(struct devfreq *df, unsigned long *freq) +{ + /* + * Platform-controlled mode doesn't care the frequency issued from + * devfreq, so just pick the max freq. + */ + *freq = DEVFREQ_MAX_FREQ; + + return 0; +} + +static int hisi_platform_gov_handler(struct devfreq *df, unsigned int event, + void *val) +{ + struct hisi_uncore_freq *uncore = dev_get_drvdata(df->dev.parent); + int rc = 0; + u32 data; + + if (WARN_ON(!uncore || !uncore->pchan)) + return -ENODEV; + + switch (event) { + case DEVFREQ_GOV_START: + data = HUCF_MODE_PLATFORM; + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data); + if (rc) + dev_err(uncore->dev, "Failed to set platform mode (%d)\n", rc); + break; + case DEVFREQ_GOV_STOP: + data = HUCF_MODE_OS; + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data); + if (rc) + dev_err(uncore->dev, "Failed to set os mode (%d)\n", rc); + break; + default: + break; + } + + return rc; +} + +/* + * In the platform-controlled mode, the platform decides the uncore frequency + * and ignores the frequency issued from the driver. + * Thus, create a pseudo 'hisi_platform' governor that stops devfreq monitor + * from working so as to save meaningless overhead. + */ +static struct devfreq_governor hisi_platform_governor = { + .name = "hisi_platform", + /* + * Set interrupt_driven to skip the devfreq monitor mechanism, though + * this governor is not interrupt-driven. + */ + .flags = DEVFREQ_GOV_FLAG_IRQ_DRIVEN, + .get_target_freq = hisi_platform_gov_func, + .event_handler = hisi_platform_gov_handler, +}; + +static void hisi_uncore_remove_platform_gov(struct hisi_uncore_freq *uncore) +{ + u32 data = HUCF_MODE_PLATFORM; + int rc; + + if (!(uncore->cap & HUCF_CAP_PLATFORM_CTRL)) + return; + + guard(mutex)(&hisi_platform_gov_usage_lock); + + if (--hisi_platform_gov_usage == 0) { + rc = devfreq_remove_governor(&hisi_platform_governor); + if (rc) + dev_err(uncore->dev, "Failed to remove hisi_platform gov (%d)\n", rc); + } + + /* + * Set to the platform-controlled mode on exit if supported, so as to + * have a certain behaviour when the driver is detached. + */ + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_SET_MODE, &data); + if (rc) + dev_err(uncore->dev, "Failed to set platform mode on exit (%d)\n", rc); +} + +static void devm_hisi_uncore_remove_platform_gov(void *data) +{ + hisi_uncore_remove_platform_gov(data); +} + +static int hisi_uncore_add_platform_gov(struct hisi_uncore_freq *uncore) +{ + if (!(uncore->cap & HUCF_CAP_PLATFORM_CTRL)) + return 0; + + guard(mutex)(&hisi_platform_gov_usage_lock); + + if (hisi_platform_gov_usage == 0) { + int rc = devfreq_add_governor(&hisi_platform_governor); + if (rc) + return rc; + } + hisi_platform_gov_usage++; + + return devm_add_action_or_reset(uncore->dev, + devm_hisi_uncore_remove_platform_gov, + uncore); +} + +/* + * Returns: + * 0 if success, uncore->related_cpus is set. + * -EINVAL if property not found, or property found but without elements in it, + * or invalid arguments received in any of the subroutine. + * Other error codes if it goes wrong. + */ +static int hisi_uncore_mark_related_cpus(struct hisi_uncore_freq *uncore, + char *property, int (*get_topo_id)(int cpu), + const struct cpumask *(*get_cpumask)(int cpu)) +{ + unsigned int i, cpu; + size_t len; + int rc; + + rc = device_property_count_u32(uncore->dev, property); + if (rc < 0) + return rc; + if (rc == 0) + return -EINVAL; + + len = rc; + u32 *num __free(kfree) = kcalloc(len, sizeof(*num), GFP_KERNEL); + if (!num) + return -ENOMEM; + + rc = device_property_read_u32_array(uncore->dev, property, num, len); + if (rc) + return rc; + + for (i = 0; i < len; i++) { + for_each_possible_cpu(cpu) { + if (get_topo_id(cpu) != num[i]) + continue; + + cpumask_or(&uncore->related_cpus, + &uncore->related_cpus, get_cpumask(cpu)); + break; + } + } + + return 0; +} + +static int get_package_id(int cpu) +{ + return topology_physical_package_id(cpu); +} + +static const struct cpumask *get_package_cpumask(int cpu) +{ + return topology_core_cpumask(cpu); +} + +static int get_cluster_id(int cpu) +{ + return topology_cluster_id(cpu); +} + +static const struct cpumask *get_cluster_cpumask(int cpu) +{ + return topology_cluster_cpumask(cpu); +} + +static int hisi_uncore_mark_related_cpus_wrap(struct hisi_uncore_freq *uncore) +{ + int rc; + + cpumask_clear(&uncore->related_cpus); + + rc = hisi_uncore_mark_related_cpus(uncore, "related-package", + get_package_id, + get_package_cpumask); + /* Success, or firmware probably broken */ + if (!rc || rc != -EINVAL) + return rc; + + /* Try another property name if rc == -EINVAL */ + return hisi_uncore_mark_related_cpus(uncore, "related-cluster", + get_cluster_id, + get_cluster_cpumask); +} + +static ssize_t related_cpus_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hisi_uncore_freq *uncore = dev_get_drvdata(dev->parent); + + return cpumap_print_to_pagebuf(true, buf, &uncore->related_cpus); +} + +static DEVICE_ATTR_RO(related_cpus); + +static struct attribute *hisi_uncore_freq_attrs[] = { + &dev_attr_related_cpus.attr, + NULL +}; +ATTRIBUTE_GROUPS(hisi_uncore_freq); + +static int hisi_uncore_devfreq_register(struct hisi_uncore_freq *uncore) +{ + struct devfreq_dev_profile *profile; + struct device *dev = uncore->dev; + unsigned long freq; + u32 data; + int rc; + + rc = hisi_uncore_get_cur_freq(dev, &freq); + if (rc) + return dev_err_probe(dev, rc, "Failed to get plat init freq\n"); + + profile = devm_kzalloc(dev, sizeof(*profile), GFP_KERNEL); + if (!profile) + return -ENOMEM; + + *profile = (struct devfreq_dev_profile) { + .initial_freq = freq, + .polling_ms = HUCF_DEFAULT_POLLING_MS, + .timer = DEVFREQ_TIMER_DELAYED, + .target = hisi_uncore_target, + .get_dev_status = hisi_uncore_get_dev_status, + .get_cur_freq = hisi_uncore_get_cur_freq, + .dev_groups = hisi_uncore_freq_groups, + }; + + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_MODE, &data); + if (rc) + return dev_err_probe(dev, rc, "Failed to get operate mode\n"); + + if (data == HUCF_MODE_PLATFORM) + uncore->devfreq = devm_devfreq_add_device(dev, profile, + hisi_platform_governor.name, NULL); + else + uncore->devfreq = devm_devfreq_add_device(dev, profile, + DEVFREQ_GOV_PERFORMANCE, NULL); + if (IS_ERR(uncore->devfreq)) + return dev_err_probe(dev, PTR_ERR(uncore->devfreq), + "Failed to add devfreq device\n"); + + return 0; +} + +static int hisi_uncore_freq_probe(struct platform_device *pdev) +{ + struct hisi_uncore_freq *uncore; + struct device *dev = &pdev->dev; + u32 cap; + int rc; + + uncore = devm_kzalloc(dev, sizeof(*uncore), GFP_KERNEL); + if (!uncore) + return -ENOMEM; + + uncore->dev = dev; + platform_set_drvdata(pdev, uncore); + + rc = hisi_uncore_init_pcc_chan(uncore); + if (rc) + return dev_err_probe(dev, rc, "Failed to init PCC channel\n"); + + rc = hisi_uncore_init_opp(uncore); + if (rc) + return dev_err_probe(dev, rc, "Failed to init OPP\n"); + + rc = hisi_uncore_cmd_send(uncore, HUCF_PCC_CMD_GET_CAP, &cap); + if (rc) + return dev_err_probe(dev, rc, "Failed to get capability\n"); + + uncore->cap = cap; + + rc = hisi_uncore_add_platform_gov(uncore); + if (rc) + return dev_err_probe(dev, rc, "Failed to add hisi_platform governor\n"); + + rc = hisi_uncore_mark_related_cpus_wrap(uncore); + if (rc) + return dev_err_probe(dev, rc, "Failed to mark related cpus\n"); + + rc = hisi_uncore_devfreq_register(uncore); + if (rc) + return dev_err_probe(dev, rc, "Failed to register devfreq\n"); + + return 0; +} + +static const struct acpi_device_id hisi_uncore_freq_acpi_match[] = { + { "HISI04F1", }, + { } +}; +MODULE_DEVICE_TABLE(acpi, hisi_uncore_freq_acpi_match); + +static struct platform_driver hisi_uncore_freq_drv = { + .probe = hisi_uncore_freq_probe, + .driver = { + .name = "hisi_uncore_freq", + .acpi_match_table = hisi_uncore_freq_acpi_match, + }, +}; +module_platform_driver(hisi_uncore_freq_drv); + +MODULE_DESCRIPTION("HiSilicon uncore frequency scaling driver"); +MODULE_AUTHOR("Jie Zhan "); +MODULE_LICENSE("GPL");