diff options
Diffstat (limited to 'drivers')
41 files changed, 768 insertions, 294 deletions
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 5f73854234ba..814882ae5059 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -309,7 +309,7 @@ int ivpu_rpm_get_if_active(struct ivpu_device *vdev) { int ret; - ret = pm_runtime_get_if_active(vdev->drm.dev, false); + ret = pm_runtime_get_if_in_use(vdev->drm.dev); drm_WARN_ON(&vdev->drm, ret < 0); return ret; diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index d155a86a8614..a50e70abdf19 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1158,6 +1158,19 @@ int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf) } /** + * cppc_get_highest_perf - Get the highest performance register value. + * @cpunum: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +int cppc_get_highest_perf(int cpunum, u64 *highest_perf) +{ + return cppc_get_perf(cpunum, HIGHEST_PERF, highest_perf); +} +EXPORT_SYMBOL_GPL(cppc_get_highest_perf); + +/** * cppc_get_epp_perf - Get the epp register value. * @cpunum: CPU from which to get epp preference value. * @epp_perf: Return address. diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 4bd16b3f0781..67db60eda370 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -27,6 +27,7 @@ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80 #define ACPI_PROCESSOR_NOTIFY_POWER 0x81 #define ACPI_PROCESSOR_NOTIFY_THROTTLING 0x82 +#define ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED 0x85 MODULE_AUTHOR("Paul Diefenbaugh"); MODULE_DESCRIPTION("ACPI Processor Driver"); @@ -83,6 +84,11 @@ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data) acpi_bus_generate_netlink_event(device->pnp.device_class, dev_name(&device->dev), event, 0); break; + case ACPI_PROCESSOR_NOTIFY_HIGEST_PERF_CHANGED: + cpufreq_update_limits(pr->id); + acpi_bus_generate_netlink_event(device->pnp.device_class, + dev_name(&device->dev), event, 0); + break; default: acpi_handle_debug(handle, "Unsupported event [0x%x]\n", event); break; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index fadcd0379dc2..5679f966f676 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -60,7 +60,6 @@ static LIST_HEAD(dpm_suspended_list); static LIST_HEAD(dpm_late_early_list); static LIST_HEAD(dpm_noirq_list); -struct suspend_stats suspend_stats; static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; @@ -578,6 +577,35 @@ bool dev_pm_skip_resume(struct device *dev) return !dev->power.must_resume; } +static bool is_async(struct device *dev) +{ + return dev->power.async_suspend && pm_async_enabled + && !pm_trace_is_enabled(); +} + +static bool dpm_async_fn(struct device *dev, async_func_t func) +{ + reinit_completion(&dev->power.completion); + + if (is_async(dev)) { + dev->power.async_in_progress = true; + + get_device(dev); + + if (async_schedule_dev_nocall(func, dev)) + return true; + + put_device(dev); + } + /* + * Because async_schedule_dev_nocall() above has returned false or it + * has not been called at all, func() is not running and it is safe to + * update the async_in_progress flag without extra synchronization. + */ + dev->power.async_in_progress = false; + return false; +} + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -657,42 +685,12 @@ Out: TRACE_RESUME(error); if (error) { - suspend_stats.failed_resume_noirq++; - dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); } } -static bool is_async(struct device *dev) -{ - return dev->power.async_suspend && pm_async_enabled - && !pm_trace_is_enabled(); -} - -static bool dpm_async_fn(struct device *dev, async_func_t func) -{ - reinit_completion(&dev->power.completion); - - if (is_async(dev)) { - dev->power.async_in_progress = true; - - get_device(dev); - - if (async_schedule_dev_nocall(func, dev)) - return true; - - put_device(dev); - } - /* - * Because async_schedule_dev_nocall() above has returned false or it - * has not been called at all, func() is not running and it is safe to - * update the async_in_progress flag without extra synchronization. - */ - dev->power.async_in_progress = false; - return false; -} - static void async_resume_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; @@ -707,9 +705,12 @@ static void dpm_noirq_resume_devices(pm_message_t state) ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, true); - mutex_lock(&dpm_list_mtx); + + async_error = 0; pm_transition = state; + mutex_lock(&dpm_list_mtx); + /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. @@ -736,6 +737,9 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "noirq"); + if (async_error) + dpm_save_failed_step(SUSPEND_RESUME_NOIRQ); + trace_suspend_resume(TPS("dpm_resume_noirq"), state.event, false); } @@ -817,8 +821,7 @@ Out: complete_all(&dev->power.completion); if (error) { - suspend_stats.failed_resume_early++; - dpm_save_failed_step(SUSPEND_RESUME_EARLY); + async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async early" : " early", error); } @@ -842,9 +845,12 @@ void dpm_resume_early(pm_message_t state) ktime_t starttime = ktime_get(); trace_suspend_resume(TPS("dpm_resume_early"), state.event, true); - mutex_lock(&dpm_list_mtx); + + async_error = 0; pm_transition = state; + mutex_lock(&dpm_list_mtx); + /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. @@ -871,6 +877,9 @@ void dpm_resume_early(pm_message_t state) mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, "early"); + if (async_error) + dpm_save_failed_step(SUSPEND_RESUME_EARLY); + trace_suspend_resume(TPS("dpm_resume_early"), state.event, false); } @@ -974,8 +983,7 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) TRACE_RESUME(error); if (error) { - suspend_stats.failed_resume++; - dpm_save_failed_step(SUSPEND_RESUME); + async_error = error; dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } @@ -1004,10 +1012,11 @@ void dpm_resume(pm_message_t state) trace_suspend_resume(TPS("dpm_resume"), state.event, true); might_sleep(); - mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; + mutex_lock(&dpm_list_mtx); + /* * Trigger the resume of "async" devices upfront so they don't have to * wait for the "non-async" ones they don't depend on. @@ -1017,29 +1026,25 @@ void dpm_resume(pm_message_t state) while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); - - get_device(dev); + list_move_tail(&dev->power.entry, &dpm_prepared_list); if (!dev->power.async_in_progress) { + get_device(dev); + mutex_unlock(&dpm_list_mtx); device_resume(dev, state, false); + put_device(dev); + mutex_lock(&dpm_list_mtx); } - - if (!list_empty(&dev->power.entry)) - list_move_tail(&dev->power.entry, &dpm_prepared_list); - - mutex_unlock(&dpm_list_mtx); - - put_device(dev); - - mutex_lock(&dpm_list_mtx); } mutex_unlock(&dpm_list_mtx); async_synchronize_full(); dpm_show_time(starttime, state, 0, NULL); + if (async_error) + dpm_save_failed_step(SUSPEND_RESUME); cpufreq_resume(); devfreq_resume(); @@ -1187,7 +1192,7 @@ static void dpm_superior_set_must_resume(struct device *dev) } /** - * __device_suspend_noirq - Execute a "noirq suspend" callback for given device. + * device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. @@ -1195,7 +1200,7 @@ static void dpm_superior_set_must_resume(struct device *dev) * The driver of @dev will not receive interrupts while this function is being * executed. */ -static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool async) +static int device_suspend_noirq(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1240,6 +1245,8 @@ Run: error = dpm_run_callback(callback, dev, state, info); if (error) { async_error = error; + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); goto Complete; } @@ -1269,54 +1276,37 @@ Complete: static void async_suspend_noirq(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = __device_suspend_noirq(dev, pm_transition, true); - if (error) { - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, pm_transition, " async", error); - } + device_suspend_noirq(dev, pm_transition, true); put_device(dev); } -static int device_suspend_noirq(struct device *dev) -{ - if (dpm_async_fn(dev, async_suspend_noirq)) - return 0; - - return __device_suspend_noirq(dev, pm_transition, false); -} - static int dpm_noirq_suspend_devices(pm_message_t state) { ktime_t starttime = ktime_get(); int error = 0; trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true); - mutex_lock(&dpm_list_mtx); + pm_transition = state; async_error = 0; + mutex_lock(&dpm_list_mtx); + while (!list_empty(&dpm_late_early_list)) { struct device *dev = to_device(dpm_late_early_list.prev); - get_device(dev); - mutex_unlock(&dpm_list_mtx); - - error = device_suspend_noirq(dev); + list_move(&dev->power.entry, &dpm_noirq_list); - mutex_lock(&dpm_list_mtx); + if (dpm_async_fn(dev, async_suspend_noirq)) + continue; - if (error) { - pm_dev_err(dev, state, " noirq", error); - dpm_save_failed_dev(dev_name(dev)); - } else if (!list_empty(&dev->power.entry)) { - list_move(&dev->power.entry, &dpm_noirq_list); - } + get_device(dev); mutex_unlock(&dpm_list_mtx); + error = device_suspend_noirq(dev, state, false); + put_device(dev); mutex_lock(&dpm_list_mtx); @@ -1324,15 +1314,16 @@ static int dpm_noirq_suspend_devices(pm_message_t state) if (error || async_error) break; } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); if (!error) error = async_error; - if (error) { - suspend_stats.failed_suspend_noirq++; + if (error) dpm_save_failed_step(SUSPEND_SUSPEND_NOIRQ); - } + dpm_show_time(starttime, state, error, "noirq"); trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, false); return error; @@ -1375,14 +1366,14 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev) } /** - * __device_suspend_late - Execute a "late suspend" callback for given device. + * device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. * * Runtime PM is disabled for @dev while this function is being executed. */ -static int __device_suspend_late(struct device *dev, pm_message_t state, bool async) +static int device_suspend_late(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1434,6 +1425,8 @@ Run: error = dpm_run_callback(callback, dev, state, info); if (error) { async_error = error; + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async late" : " late", error); goto Complete; } dpm_propagate_wakeup_to_parent(dev); @@ -1450,24 +1443,11 @@ Complete: static void async_suspend_late(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - error = __device_suspend_late(dev, pm_transition, true); - if (error) { - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, pm_transition, " async", error); - } + device_suspend_late(dev, pm_transition, true); put_device(dev); } -static int device_suspend_late(struct device *dev) -{ - if (dpm_async_fn(dev, async_suspend_late)) - return 0; - - return __device_suspend_late(dev, pm_transition, false); -} - /** * dpm_suspend_late - Execute "late suspend" callbacks for all devices. * @state: PM transition of the system being carried out. @@ -1478,32 +1458,28 @@ int dpm_suspend_late(pm_message_t state) int error = 0; trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); - wake_up_all_idle_cpus(); - mutex_lock(&dpm_list_mtx); + pm_transition = state; async_error = 0; - while (!list_empty(&dpm_suspended_list)) { - struct device *dev = to_device(dpm_suspended_list.prev); - - get_device(dev); + wake_up_all_idle_cpus(); - mutex_unlock(&dpm_list_mtx); + mutex_lock(&dpm_list_mtx); - error = device_suspend_late(dev); + while (!list_empty(&dpm_suspended_list)) { + struct device *dev = to_device(dpm_suspended_list.prev); - mutex_lock(&dpm_list_mtx); + list_move(&dev->power.entry, &dpm_late_early_list); - if (!list_empty(&dev->power.entry)) - list_move(&dev->power.entry, &dpm_late_early_list); + if (dpm_async_fn(dev, async_suspend_late)) + continue; - if (error) { - pm_dev_err(dev, state, " late", error); - dpm_save_failed_dev(dev_name(dev)); - } + get_device(dev); mutex_unlock(&dpm_list_mtx); + error = device_suspend_late(dev, state, false); + put_device(dev); mutex_lock(&dpm_list_mtx); @@ -1511,12 +1487,14 @@ int dpm_suspend_late(pm_message_t state) if (error || async_error) break; } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); if (!error) error = async_error; + if (error) { - suspend_stats.failed_suspend_late++; dpm_save_failed_step(SUSPEND_SUSPEND_LATE); dpm_resume_early(resume_event(state)); } @@ -1597,12 +1575,12 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) } /** - * __device_suspend - Execute "suspend" callbacks for given device. + * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. * @state: PM transition of the system being carried out. * @async: If true, the device is being suspended asynchronously. */ -static int __device_suspend(struct device *dev, pm_message_t state, bool async) +static int device_suspend(struct device *dev, pm_message_t state, bool async) { pm_callback_t callback = NULL; const char *info = NULL; @@ -1716,8 +1694,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) dpm_watchdog_clear(&wd); Complete: - if (error) + if (error) { async_error = error; + dpm_save_failed_dev(dev_name(dev)); + pm_dev_err(dev, state, async ? " async" : "", error); + } complete_all(&dev->power.completion); TRACE_SUSPEND(error); @@ -1727,25 +1708,11 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async) static void async_suspend(void *data, async_cookie_t cookie) { struct device *dev = data; - int error; - - error = __device_suspend(dev, pm_transition, true); - if (error) { - dpm_save_failed_dev(dev_name(dev)); - pm_dev_err(dev, pm_transition, " async", error); - } + device_suspend(dev, pm_transition, true); put_device(dev); } -static int device_suspend(struct device *dev) -{ - if (dpm_async_fn(dev, async_suspend)) - return 0; - - return __device_suspend(dev, pm_transition, false); -} - /** * dpm_suspend - Execute "suspend" callbacks for all non-sysdev devices. * @state: PM transition of the system being carried out. @@ -1761,29 +1728,25 @@ int dpm_suspend(pm_message_t state) devfreq_suspend(); cpufreq_suspend(); - mutex_lock(&dpm_list_mtx); pm_transition = state; async_error = 0; - while (!list_empty(&dpm_prepared_list)) { - struct device *dev = to_device(dpm_prepared_list.prev); - get_device(dev); + mutex_lock(&dpm_list_mtx); - mutex_unlock(&dpm_list_mtx); + while (!list_empty(&dpm_prepared_list)) { + struct device *dev = to_device(dpm_prepared_list.prev); - error = device_suspend(dev); + list_move(&dev->power.entry, &dpm_suspended_list); - mutex_lock(&dpm_list_mtx); + if (dpm_async_fn(dev, async_suspend)) + continue; - if (error) { - pm_dev_err(dev, state, "", error); - dpm_save_failed_dev(dev_name(dev)); - } else if (!list_empty(&dev->power.entry)) { - list_move(&dev->power.entry, &dpm_suspended_list); - } + get_device(dev); mutex_unlock(&dpm_list_mtx); + error = device_suspend(dev, state, false); + put_device(dev); mutex_lock(&dpm_list_mtx); @@ -1791,14 +1754,16 @@ int dpm_suspend(pm_message_t state) if (error || async_error) break; } + mutex_unlock(&dpm_list_mtx); + async_synchronize_full(); if (!error) error = async_error; - if (error) { - suspend_stats.failed_suspend++; + + if (error) dpm_save_failed_step(SUSPEND_SUSPEND); - } + dpm_show_time(starttime, state, error, NULL); trace_suspend_resume(TPS("dpm_suspend"), state.event, false); return error; @@ -1949,11 +1914,11 @@ int dpm_suspend_start(pm_message_t state) int error; error = dpm_prepare(state); - if (error) { - suspend_stats.failed_prepare++; + if (error) dpm_save_failed_step(SUSPEND_PREPARE); - } else + else error = dpm_suspend(state); + dpm_show_time(starttime, state, error, "start"); return error; } diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 05793c9fbb84..2ee45841486b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -94,6 +94,7 @@ static void update_pm_runtime_accounting(struct device *dev) static void __update_runtime_status(struct device *dev, enum rpm_status status) { update_pm_runtime_accounting(dev); + trace_rpm_status(dev, status); dev->power.runtime_status = status; } @@ -1176,7 +1177,7 @@ int __pm_runtime_resume(struct device *dev, int rpmflags) EXPORT_SYMBOL_GPL(__pm_runtime_resume); /** - * pm_runtime_get_if_active - Conditionally bump up device usage counter. + * pm_runtime_get_conditional - Conditionally bump up device usage counter. * @dev: Device to handle. * @ign_usage_count: Whether or not to look at the current usage counter value. * @@ -1197,7 +1198,7 @@ EXPORT_SYMBOL_GPL(__pm_runtime_resume); * The caller is responsible for decrementing the runtime PM usage counter of * @dev after this function has returned a positive value for it. */ -int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) +static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count) { unsigned long flags; int retval; @@ -1218,9 +1219,40 @@ int pm_runtime_get_if_active(struct device *dev, bool ign_usage_count) return retval; } + +/** + * pm_runtime_get_if_active - Bump up runtime PM usage counter if the device is + * in active state + * @dev: Target device. + * + * Increment the runtime PM usage counter of @dev if its runtime PM status is + * %RPM_ACTIVE, in which case it returns 1. If the device is in a different + * state, 0 is returned. -EINVAL is returned if runtime PM is disabled for the + * device, in which case also the usage_count will remain unmodified. + */ +int pm_runtime_get_if_active(struct device *dev) +{ + return pm_runtime_get_conditional(dev, true); +} EXPORT_SYMBOL_GPL(pm_runtime_get_if_active); /** + * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. + * @dev: Target device. + * + * Increment the runtime PM usage counter of @dev if its runtime PM status is + * %RPM_ACTIVE and its runtime PM usage counter is greater than 0, in which case + * it returns 1. If the device is in a different state or its usage_count is 0, + * 0 is returned. -EINVAL is returned if runtime PM is disabled for the device, + * in which case also the usage_count will remain unmodified. + */ +int pm_runtime_get_if_in_use(struct device *dev) +{ + return pm_runtime_get_conditional(dev, false); +} +EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use); + +/** * __pm_runtime_set_status - Set runtime PM status of a device. * @dev: Device to handle. * @status: New runtime PM status of the device. diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 42171f766dcb..5a5a9e978e85 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -313,8 +313,10 @@ void dev_pm_enable_wake_irq_complete(struct device *dev) return; if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED && - wirq->status & WAKE_IRQ_DEDICATED_REVERSE) + wirq->status & WAKE_IRQ_DEDICATED_REVERSE) { enable_irq(wirq->irq); + wirq->status |= WAKE_IRQ_DEDICATED_ENABLED; + } } /** diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index f911606897b8..a0ebad77666e 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -173,6 +173,7 @@ config ARM_QCOM_CPUFREQ_NVMEM config ARM_QCOM_CPUFREQ_HW tristate "QCOM CPUFreq HW driver" depends on ARCH_QCOM || COMPILE_TEST + depends on COMMON_CLK help Support for the CPUFreq HW driver. Some QCOM chipsets have a HW engine to offload the steps diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 1791d37fbc53..2015c9fcc3c9 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -37,6 +37,7 @@ #include <linux/uaccess.h> #include <linux/static_call.h> #include <linux/amd-pstate.h> +#include <linux/topology.h> #include <acpi/processor.h> #include <acpi/cppc_acpi.h> @@ -49,6 +50,7 @@ #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define AMD_PSTATE_PREFCORE_THRESHOLD 166 /* * TODO: We need more time to fine tune processors with shared memory solution @@ -64,6 +66,7 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; /* * AMD Energy Preference Performance (EPP) @@ -297,13 +300,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + /* For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as + * the default max perf. */ - highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -311,6 +315,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -324,8 +329,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) + if (cpudata->hw_prefcore) + highest_perf = AMD_PSTATE_PREFCORE_THRESHOLD; + else highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -334,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -477,12 +484,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf; struct amd_cpudata *cpudata = policy->driver_data; max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + lowest_perf = READ_ONCE(cpudata->lowest_perf); + if (min_limit_perf < lowest_perf) + min_limit_perf = lowest_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); WRITE_ONCE(cpudata->max_limit_freq, policy->max); @@ -570,7 +584,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu, if (target_perf < capacity) des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity); - min_perf = READ_ONCE(cpudata->highest_perf); + min_perf = READ_ONCE(cpudata->lowest_perf); if (_min_perf < capacity) min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity); @@ -706,6 +720,114 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); +} + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + int ret, prio; + u32 highest_perf; + + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + if (ret) + return; + + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } + + if (!amd_pstate_prefcore) + return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); + + schedule_work(&sched_prefcore_work); +} + +static void amd_pstate_update_limits(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; + + mutex_lock(&amd_pstate_driver_lock); + if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) + goto free_cpufreq_put; + + ret = amd_pstate_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; + + prev_high = READ_ONCE(cpudata->prefcore_ranking); + if (prev_high != cur_high) { + highest_perf_changed = true; + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } + +free_cpufreq_put: + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + + mutex_unlock(&amd_pstate_driver_lock); +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -727,6 +849,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -877,6 +1001,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1074,18 +1220,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1250,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1259,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1151,6 +1311,8 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; @@ -1232,6 +1394,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + if (min_limit_perf < min_perf) + min_limit_perf = min_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1432,6 +1600,7 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; @@ -1446,6 +1615,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1567,7 +1737,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 35fb3a559ea9..1a1857b0a6f4 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -481,6 +481,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (!policy) + return 0; struct private_data *priv = policy->driver_data; cpufreq_cpu_put(policy); diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bd1e1357cef8..b993a498084b 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -156,6 +156,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sc8280xp", }, + { .compatible = "qcom,sdm670", }, { .compatible = "qcom,sdm845", }, { .compatible = "qcom,sdx75", }, { .compatible = "qcom,sm6115", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 44db4f59c4cc..f6f8d7f450e7 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; if (latency) { + unsigned int max_delay_us = 2 * MSEC_PER_SEC; + + /* + * If the platform already has high transition_latency, use it + * as-is. + */ + if (latency > max_delay_us) + return latency; + /* - * For platforms that can change the frequency very fast (< 10 + * For platforms that can change the frequency very fast (< 2 * us), the above formula gives a decent transition delay. But * for platforms where transition_latency is in milliseconds, it * ends up giving unrealistic values. * - * Cap the default transition delay to 10 ms, which seems to be + * Cap the default transition delay to 2 ms, which seems to be * a reasonable amount of time after which we should reevaluate * the frequency. */ - return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000); + return min(latency * LATENCY_MULTIPLIER, max_delay_us); } return LATENCY_MULTIPLIER; @@ -1571,7 +1580,8 @@ static int cpufreq_online(unsigned int cpu) if (cpufreq_driver->ready) cpufreq_driver->ready(policy); - if (cpufreq_thermal_control_enabled(cpufreq_driver)) + /* Register cpufreq cooling only for a new policy */ + if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver)) policy->cdev = of_cpufreq_cooling_register(policy); pr_debug("initialization complete\n"); @@ -1655,11 +1665,6 @@ static void __cpufreq_offline(unsigned int cpu, struct cpufreq_policy *policy) else policy->last_policy = policy->policy; - if (cpufreq_thermal_control_enabled(cpufreq_driver)) { - cpufreq_cooling_unregister(policy->cdev); - policy->cdev = NULL; - } - if (has_target()) cpufreq_exit_governor(policy); @@ -1720,6 +1725,15 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return; } + /* + * Unregister cpufreq cooling once all the CPUs of the policy are + * removed. + */ + if (cpufreq_thermal_control_enabled(cpufreq_driver)) { + cpufreq_cooling_unregister(policy->cdev); + policy->cdev = NULL; + } + /* We did light-weight exit earlier, do full tear down now */ if (cpufreq_driver->offline) cpufreq_driver->exit(policy); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c52d19d67557..a7c38b8b3e78 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -22,7 +22,6 @@ #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) -#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (1) #define MAX_FREQUENCY_UP_THRESHOLD (100) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 33728c242f66..c20d3ecc5a81 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -14,6 +14,8 @@ #include <linux/pm_opp.h> #include <linux/platform_device.h> #include <linux/regulator/consumer.h> +#include <linux/mfd/syscon.h> +#include <linux/regmap.h> #define PU_SOC_VOLTAGE_NORMAL 1250000 #define PU_SOC_VOLTAGE_HIGH 1275000 @@ -225,8 +227,6 @@ static void imx6x_disable_freq_in_opp(struct device *dev, unsigned long freq) static int imx6q_opp_check_speed_grading(struct device *dev) { - struct device_node *np; - void __iomem *base; u32 val; int ret; @@ -235,16 +235,11 @@ static int imx6q_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp"); - if (!np) - return -ENOENT; + struct regmap *ocotp; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6q-ocotp"); + if (IS_ERR(ocotp)) + return -ENOENT; /* * SPEED_GRADING[1:0] defines the max speed of ARM: @@ -254,8 +249,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev) * 2b'00: 792000000Hz; * We need to set the max speed of ARM according to fuse map. */ - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } val >>= OCOTP_CFG3_SPEED_SHIFT; @@ -290,25 +284,16 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - struct device_node *np; - void __iomem *base; - - np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); - if (!np) - np = of_find_compatible_node(NULL, NULL, - "fsl,imx6ull-ocotp"); - if (!np) - return -ENOENT; + struct regmap *ocotp; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ul-ocotp"); + if (IS_ERR(ocotp)) + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ull-ocotp"); + + if (IS_ERR(ocotp)) + return -ENOENT; - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 79619227ea51..dbbf299f4219 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -25,6 +25,7 @@ #include <linux/acpi.h> #include <linux/vmalloc.h> #include <linux/pm_qos.h> +#include <linux/bitfield.h> #include <trace/events/power.h> #include <asm/cpu.h> @@ -201,8 +202,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -241,7 +240,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; @@ -3407,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } -static const struct x86_cpu_id intel_epp_balance_perf[] = { +#define POWERSAVE_MASK GENMASK(7, 0) +#define BALANCE_POWER_MASK GENMASK(15, 8) +#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16) +#define PERFORMANCE_MASK GENMASK(31, 24) + +#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \ + (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\ + FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\ + FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\ + FIELD_PREP_CONST(PERFORMANCE_MASK, performance)) + +#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \ + (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\ + balance_perf, HWP_EPP_PERFORMANCE)) + +static const struct x86_cpu_id intel_epp_default[] = { /* * Set EPP value as 102, this is the max suggested EPP * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + HWP_EPP_BALANCE_POWERSAVE, 115, 16)), {} }; @@ -3512,11 +3527,24 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) { - const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default); const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor); - if (id) - epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + if (id) { + epp_values[EPP_INDEX_POWERSAVE] = + FIELD_GET(POWERSAVE_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_POWERSAVE] = + FIELD_GET(BALANCE_POWER_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = + FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data); + epp_values[EPP_INDEX_PERFORMANCE] = + FIELD_GET(PERFORMANCE_MASK, id->driver_data); + pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n", + epp_values[EPP_INDEX_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_PERFORMANCE], + epp_values[EPP_INDEX_PERFORMANCE]); + } if (hybrid_id) { hybrid_scaling_factor = hybrid_id->driver_data; diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index d46afb3c0092..8d097dcddda4 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -13,6 +13,7 @@ #include <linux/of.h> #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <linux/regulator/consumer.h> #include <linux/slab.h> #define LUT_MAX_ENTRIES 32U @@ -300,7 +301,23 @@ static struct cpufreq_driver cpufreq_mtk_hw_driver = { static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev) { const void *data; - int ret; + int ret, cpu; + struct device *cpu_dev; + struct regulator *cpu_reg; + + /* Make sure that all CPU supplies are available before proceeding. */ + for_each_possible_cpu(cpu) { + cpu_dev = get_cpu_device(cpu); + if (!cpu_dev) + return dev_err_probe(&pdev->dev, -EPROBE_DEFER, + "Failed to get cpu%d device\n", cpu); + + cpu_reg = devm_regulator_get(cpu_dev, "cpu"); + if (IS_ERR(cpu_reg)) + return dev_err_probe(&pdev->dev, PTR_ERR(cpu_reg), + "CPU%d regulator get failed\n", cpu); + } + data = of_device_get_match_data(&pdev->dev); if (!data) diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 4ee23f4ebf4a..0b483bd0d3ca 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -144,6 +144,29 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, return 0; } +static int +scmi_get_rate_limit(u32 domain, bool has_fast_switch) +{ + int ret, rate_limit; + + if (has_fast_switch) { + /* + * Fast channels are used whenever available, + * so use their rate_limit value if populated. + */ + ret = perf_ops->fast_switch_rate_limit(ph, domain, + &rate_limit); + if (!ret && rate_limit) + return rate_limit; + } + + ret = perf_ops->rate_limit_get(ph, domain, &rate_limit); + if (ret) + return 0; + + return rate_limit; +} + static int scmi_cpufreq_init(struct cpufreq_policy *policy) { int ret, nr_opp, domain; @@ -250,6 +273,9 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = perf_ops->fast_switch_possible(ph, domain); + policy->transition_delay_us = + scmi_get_rate_limit(domain, policy->fast_switch_possible); + return 0; out_free_opp: diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index d9cda7f6ccb9..cf5873cc45dc 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,6 +16,7 @@ #include <linux/cpumask.h> #include <linux/tick.h> #include <linux/cpu.h> +#include <linux/math64.h> #include "cpuidle.h" @@ -187,7 +188,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) s->target_residency = div_u64(s->target_residency_ns, NSEC_PER_USEC); if (s->exit_latency > 0) - s->exit_latency_ns = s->exit_latency * NSEC_PER_USEC; + s->exit_latency_ns = mul_u32_u32(s->exit_latency, NSEC_PER_USEC); else if (s->exit_latency_ns < 0) s->exit_latency_ns = 0; else diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 1dff3a52917d..663b7f164d20 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -98,10 +98,15 @@ static void adjust_poll_limit(struct cpuidle_device *dev, u64 block_ns) unsigned int shrink = guest_halt_poll_shrink; val = dev->poll_limit_ns; - if (shrink == 0) + if (shrink == 0) { val = 0; - else + } else { val /= shrink; + /* Reset value to 0 if shrunk below grow_start */ + if (val < guest_halt_poll_grow_start) + val = 0; + } + trace_guest_halt_poll_ns_shrink(val, dev->poll_limit_ns); dev->poll_limit_ns = val; } diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 34d77802c990..2709598f3008 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1624,7 +1624,7 @@ static void scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph, u8 describe_id, u32 message_id, u32 valid_size, u32 domain, void __iomem **p_addr, - struct scmi_fc_db_info **p_db) + struct scmi_fc_db_info **p_db, u32 *rate_limit) { int ret; u32 flags; @@ -1668,6 +1668,9 @@ scmi_common_fastchannel_init(const struct scmi_protocol_handle *ph, goto err_xfer; } + if (rate_limit) + *rate_limit = le32_to_cpu(resp->rate_limit) & GENMASK(19, 0); + phys_addr = le32_to_cpu(resp->chan_addr_low); phys_addr |= (u64)le32_to_cpu(resp->chan_addr_high) << 32; addr = devm_ioremap(ph->dev, phys_addr, size); diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c index 981e327e63e3..8e832d1ad825 100644 --- a/drivers/firmware/arm_scmi/perf.c +++ b/drivers/firmware/arm_scmi/perf.c @@ -153,6 +153,7 @@ struct perf_dom_info { bool perf_fastchannels; bool level_indexing_mode; u32 opp_count; + u32 rate_limit_us; u32 sustained_freq_khz; u32 sustained_perf_level; unsigned long mult_factor; @@ -282,6 +283,8 @@ scmi_perf_domain_attributes_get(const struct scmi_protocol_handle *ph, if (PROTOCOL_REV_MAJOR(version) >= 0x4) dom_info->level_indexing_mode = SUPPORTS_LEVEL_INDEXING(flags); + dom_info->rate_limit_us = le32_to_cpu(attr->rate_limit_us) & + GENMASK(19, 0); dom_info->sustained_freq_khz = le32_to_cpu(attr->sustained_freq_khz); dom_info->sustained_perf_level = @@ -825,23 +828,27 @@ static void scmi_perf_domain_init_fc(const struct scmi_protocol_handle *ph, ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LEVEL_GET, 4, dom->id, - &fc[PERF_FC_LEVEL].get_addr, NULL); + &fc[PERF_FC_LEVEL].get_addr, NULL, + &fc[PERF_FC_LEVEL].rate_limit); ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LIMITS_GET, 8, dom->id, - &fc[PERF_FC_LIMIT].get_addr, NULL); + &fc[PERF_FC_LIMIT].get_addr, NULL, + &fc[PERF_FC_LIMIT].rate_limit); if (dom->info.set_perf) ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LEVEL_SET, 4, dom->id, &fc[PERF_FC_LEVEL].set_addr, - &fc[PERF_FC_LEVEL].set_db); + &fc[PERF_FC_LEVEL].set_db, + &fc[PERF_FC_LEVEL].rate_limit); if (dom->set_limits) ph->hops->fastchannel_init(ph, PERF_DESCRIBE_FASTCHANNEL, PERF_LIMITS_SET, 8, dom->id, &fc[PERF_FC_LIMIT].set_addr, - &fc[PERF_FC_LIMIT].set_db); + &fc[PERF_FC_LIMIT].set_db, + &fc[PERF_FC_LIMIT].rate_limit); dom->fc_info = fc; } @@ -894,6 +901,23 @@ scmi_dvfs_transition_latency_get(const struct scmi_protocol_handle *ph, return dom->opp[dom->opp_count - 1].trans_latency_us * 1000; } +static int +scmi_dvfs_rate_limit_get(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit) +{ + struct perf_dom_info *dom; + + if (!rate_limit) + return -EINVAL; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return PTR_ERR(dom); + + *rate_limit = dom->rate_limit_us; + return 0; +} + static int scmi_dvfs_freq_set(const struct scmi_protocol_handle *ph, u32 domain, unsigned long freq, bool poll) { @@ -993,6 +1017,25 @@ static bool scmi_fast_switch_possible(const struct scmi_protocol_handle *ph, return dom->fc_info && dom->fc_info[PERF_FC_LEVEL].set_addr; } +static int scmi_fast_switch_rate_limit(const struct scmi_protocol_handle *ph, + u32 domain, u32 *rate_limit) +{ + struct perf_dom_info *dom; + + if (!rate_limit) + return -EINVAL; + + dom = scmi_perf_domain_lookup(ph, domain); + if (IS_ERR(dom)) + return PTR_ERR(dom); + + if (!dom->fc_info) + return -EINVAL; + + *rate_limit = dom->fc_info[PERF_FC_LEVEL].rate_limit; + return 0; +} + static enum scmi_power_scale scmi_power_scale_get(const struct scmi_protocol_handle *ph) { @@ -1009,11 +1052,13 @@ static const struct scmi_perf_proto_ops perf_proto_ops = { .level_set = scmi_perf_level_set, .level_get = scmi_perf_level_get, .transition_latency_get = scmi_dvfs_transition_latency_get, + .rate_limit_get = scmi_dvfs_rate_limit_get, .device_opps_add = scmi_dvfs_device_opps_add, .freq_set = scmi_dvfs_freq_set, .freq_get = scmi_dvfs_freq_get, .est_power_get = scmi_dvfs_est_power_get, .fast_switch_possible = scmi_fast_switch_possible, + .fast_switch_rate_limit = scmi_fast_switch_rate_limit, .power_scale_get = scmi_power_scale_get, }; diff --git a/drivers/firmware/arm_scmi/powercap.c b/drivers/firmware/arm_scmi/powercap.c index 2fab92367e42..ea9201e7044c 100644 --- a/drivers/firmware/arm_scmi/powercap.c +++ b/drivers/firmware/arm_scmi/powercap.c @@ -719,20 +719,24 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph, ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_CAP_SET, 4, domain, &fc[POWERCAP_FC_CAP].set_addr, - &fc[POWERCAP_FC_CAP].set_db); + &fc[POWERCAP_FC_CAP].set_db, + &fc[POWERCAP_FC_CAP].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_CAP_GET, 4, domain, - &fc[POWERCAP_FC_CAP].get_addr, NULL); + &fc[POWERCAP_FC_CAP].get_addr, NULL, + &fc[POWERCAP_FC_CAP].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_PAI_SET, 4, domain, &fc[POWERCAP_FC_PAI].set_addr, - &fc[POWERCAP_FC_PAI].set_db); + &fc[POWERCAP_FC_PAI].set_db, + &fc[POWERCAP_FC_PAI].rate_limit); ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL, POWERCAP_PAI_GET, 4, domain, - &fc[POWERCAP_FC_PAI].get_addr, NULL); + &fc[POWERCAP_FC_PAI].get_addr, NULL, + &fc[POWERCAP_PAI_GET].rate_limit); *p_fc = fc; } diff --git a/drivers/firmware/arm_scmi/protocols.h b/drivers/firmware/arm_scmi/protocols.h index 693019fff0f6..317d3fb32676 100644 --- a/drivers/firmware/arm_scmi/protocols.h +++ b/drivers/firmware/arm_scmi/protocols.h @@ -235,6 +235,7 @@ struct scmi_fc_info { void __iomem *set_addr; void __iomem *get_addr; struct scmi_fc_db_info *set_db; + u32 rate_limit; }; /** @@ -273,7 +274,8 @@ struct scmi_proto_helpers_ops { u8 describe_id, u32 message_id, u32 valid_size, u32 domain, void __iomem **p_addr, - struct scmi_fc_db_info **p_db); + struct scmi_fc_db_info **p_db, + u32 *rate_limit); void (*fastchannel_db_ring)(struct scmi_fc_db_info *db); }; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 860b51b56a92..d4e844128826 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -246,7 +246,10 @@ static intel_wakeref_t __intel_runtime_pm_get_if_active(struct intel_runtime_pm * function, since the power state is undefined. This applies * atm to the late/early system suspend/resume handlers. */ - if (pm_runtime_get_if_active(rpm->kdev, ignore_usecount) <= 0) + if ((ignore_usecount && + pm_runtime_get_if_active(rpm->kdev) <= 0) || + (!ignore_usecount && + pm_runtime_get_if_in_use(rpm->kdev) <= 0)) return 0; } diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index b429c2876a76..dd110058bf74 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -330,7 +330,7 @@ int xe_pm_runtime_put(struct xe_device *xe) int xe_pm_runtime_get_if_active(struct xe_device *xe) { - return pm_runtime_get_if_active(xe->drm.dev, true); + return pm_runtime_get_if_active(xe->drm.dev); } void xe_pm_assert_unbounded_bridge(struct xe_device *xe) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index bcf1198e8991..e486027f8b07 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -1934,7 +1934,8 @@ static void __init spr_idle_state_table_update(void) static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) { - unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; + unsigned int mwait_cstate = (MWAIT_HINT2CSTATE(mwait_hint) + 1) & + MWAIT_CSTATE_MASK; unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & MWAIT_SUBSTATE_MASK; diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index e21287d50c15..e1ae0f9fad43 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -674,7 +674,7 @@ static int ccs_set_ctrl(struct v4l2_ctrl *ctrl) break; } - pm_status = pm_runtime_get_if_active(&client->dev, true); + pm_status = pm_runtime_get_if_active(&client->dev); if (!pm_status) return 0; diff --git a/drivers/media/i2c/ov64a40.c b/drivers/media/i2c/ov64a40.c index 4fba4c2cb064..541bf74581d2 100644 --- a/drivers/media/i2c/ov64a40.c +++ b/drivers/media/i2c/ov64a40.c @@ -3287,7 +3287,7 @@ static int ov64a40_set_ctrl(struct v4l2_ctrl *ctrl) exp_max, 1, exp_val); } - pm_status = pm_runtime_get_if_active(ov64a40->dev, true); + pm_status = pm_runtime_get_if_active(ov64a40->dev); if (!pm_status) return 0; diff --git a/drivers/media/i2c/thp7312.c b/drivers/media/i2c/thp7312.c index 2806887514dc..19bd923a7315 100644 --- a/drivers/media/i2c/thp7312.c +++ b/drivers/media/i2c/thp7312.c @@ -1052,7 +1052,7 @@ static int thp7312_s_ctrl(struct v4l2_ctrl *ctrl) if (ctrl->flags & V4L2_CTRL_FLAG_INACTIVE) return -EINVAL; - if (!pm_runtime_get_if_active(thp7312->dev, true)) + if (!pm_runtime_get_if_active(thp7312->dev)) return 0; switch (ctrl->id) { diff --git a/drivers/net/ipa/ipa_smp2p.c b/drivers/net/ipa/ipa_smp2p.c index aeccce9fab72..2f917582c423 100644 --- a/drivers/net/ipa/ipa_smp2p.c +++ b/drivers/net/ipa/ipa_smp2p.c @@ -90,7 +90,7 @@ static void ipa_smp2p_notify(struct ipa_smp2p *smp2p) if (smp2p->notified) return; - smp2p->power_on = pm_runtime_get_if_active(smp2p->ipa->dev, true) > 0; + smp2p->power_on = pm_runtime_get_if_active(smp2p->ipa->dev) > 0; /* Signal whether the IPA power is enabled */ mask = BIT(smp2p->enabled_bit); diff --git a/drivers/opp/core.c b/drivers/opp/core.c index c4e0432ae42a..e233734b7220 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2065,6 +2065,7 @@ int _opp_add_v1(struct opp_table *opp_table, struct device *dev, /* populate the opp table */ new_opp->rates[0] = data->freq; new_opp->level = data->level; + new_opp->turbo = data->turbo; tol = u_volt * opp_table->voltage_tolerance_v1 / 100; new_opp->supplies[0].u_volt = u_volt; new_opp->supplies[0].u_volt_min = u_volt - tol; diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index ec030b19164a..105de7c3274a 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -37,10 +37,12 @@ static ssize_t bw_name_read(struct file *fp, char __user *userbuf, size_t count, loff_t *ppos) { struct icc_path *path = fp->private_data; + const char *name = icc_get_name(path); char buf[64]; - int i; + int i = 0; - i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + if (name) + i = scnprintf(buf, sizeof(buf), "%.62s\n", name); return simple_read_from_buffer(userbuf, count, ppos, buf, i); } @@ -56,11 +58,11 @@ static void opp_debug_create_bw(struct dev_pm_opp *opp, struct dentry *pdentry) { struct dentry *d; - char name[20]; + char name[] = "icc-path-XXXXXXXXXXX"; /* Integers can take 11 chars max */ int i; for (i = 0; i < opp_table->path_count; i++) { - snprintf(name, sizeof(name), "icc-path-%.1d", i); + snprintf(name, sizeof(name), "icc-path-%d", i); /* Create per-path directory */ d = debugfs_create_dir(name, pdentry); @@ -78,7 +80,7 @@ static void opp_debug_create_clks(struct dev_pm_opp *opp, struct opp_table *opp_table, struct dentry *pdentry) { - char name[12]; + char name[] = "rate_hz_XXXXXXXXXXX"; /* Integers can take 11 chars max */ int i; if (opp_table->clk_count == 1) { @@ -100,7 +102,7 @@ static void opp_debug_create_supplies(struct dev_pm_opp *opp, int i; for (i = 0; i < opp_table->regulator_count; i++) { - char name[15]; + char name[] = "supply-XXXXXXXXXXX"; /* Integers can take 11 chars max */ snprintf(name, sizeof(name), "supply-%d", i); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index ccee56615f78..1e33f0e2d945 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2532,7 +2532,7 @@ static void pci_pme_list_scan(struct work_struct *work) * course of the call. */ if (bdev) { - bref = pm_runtime_get_if_active(bdev, true); + bref = pm_runtime_get_if_active(bdev); if (!bref) continue; diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c index ce920f17f45f..f390665743c4 100644 --- a/drivers/powercap/dtpm.c +++ b/drivers/powercap/dtpm.c @@ -522,7 +522,7 @@ static int dtpm_for_each_child(const struct dtpm_node *hierarchy, /** * dtpm_create_hierarchy - Create the dtpm hierarchy - * @hierarchy: An array of struct dtpm_node describing the hierarchy + * @dtpm_match_table: Pointer to the array of device ID structures * * The function is called by the platform specific code with the * description of the different node in the hierarchy. It creates the diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 9193c3b8edeb..bc90126f1b5f 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -42,6 +42,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu); + struct em_perf_state *table; struct cpumask cpus; unsigned long freq; u64 power; @@ -50,20 +51,22 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); nr_cpus = cpumask_weight(&cpus); + rcu_read_lock(); + table = em_perf_state_from_pd(pd); for (i = 0; i < pd->nr_perf_states; i++) { - power = pd->table[i].power * nr_cpus; + power = table[i].power * nr_cpus; if (power > power_limit) break; } - freq = pd->table[i - 1].frequency; + freq = table[i - 1].frequency; + power_limit = table[i - 1].power * nr_cpus; + rcu_read_unlock(); freq_qos_update_request(&dtpm_cpu->qos_req, freq); - power_limit = pd->table[i - 1].power * nr_cpus; - return power_limit; } @@ -87,9 +90,11 @@ static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power) static u64 get_pd_power_uw(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); + struct em_perf_state *table; struct em_perf_domain *pd; struct cpumask *pd_mask; unsigned long freq; + u64 power = 0; int i; pd = em_cpu_get(dtpm_cpu->cpu); @@ -98,33 +103,43 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) freq = cpufreq_quick_get(dtpm_cpu->cpu); + rcu_read_lock(); + table = em_perf_state_from_pd(pd); for (i = 0; i < pd->nr_perf_states; i++) { - if (pd->table[i].frequency < freq) + if (table[i].frequency < freq) continue; - return scale_pd_power_uw(pd_mask, pd->table[i].power); + power = scale_pd_power_uw(pd_mask, table[i].power); + break; } + rcu_read_unlock(); - return 0; + return power; } static int update_pd_power_uw(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm); struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu); + struct em_perf_state *table; struct cpumask cpus; int nr_cpus; cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus)); nr_cpus = cpumask_weight(&cpus); - dtpm->power_min = em->table[0].power; + rcu_read_lock(); + table = em_perf_state_from_pd(em); + + dtpm->power_min = table[0].power; dtpm->power_min *= nr_cpus; - dtpm->power_max = em->table[em->nr_perf_states - 1].power; + dtpm->power_max = table[em->nr_perf_states - 1].power; dtpm->power_max *= nr_cpus; + rcu_read_unlock(); + return 0; } @@ -143,7 +158,7 @@ static void pd_release(struct dtpm *dtpm) cpufreq_cpu_put(policy); } - + kfree(dtpm_cpu); } @@ -180,6 +195,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) { struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; + struct em_perf_state *table; struct em_perf_domain *pd; char name[CPUFREQ_NAME_LEN]; int ret = -ENOMEM; @@ -216,10 +232,13 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *parent) if (ret) goto out_kfree_dtpm_cpu; + rcu_read_lock(); + table = em_perf_state_from_pd(pd); ret = freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, - pd->table[pd->nr_perf_states - 1].frequency); - if (ret) + table[pd->nr_perf_states - 1].frequency); + rcu_read_unlock(); + if (ret < 0) goto out_dtpm_unregister; cpufreq_cpu_put(policy); diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfreq.c index 612c3b59dd5b..f40bce8176df 100644 --- a/drivers/powercap/dtpm_devfreq.c +++ b/drivers/powercap/dtpm_devfreq.c @@ -37,11 +37,16 @@ static int update_pd_power_uw(struct dtpm *dtpm) struct devfreq *devfreq = dtpm_devfreq->devfreq; struct device *dev = devfreq->dev.parent; struct em_perf_domain *pd = em_pd_get(dev); + struct em_perf_state *table; - dtpm->power_min = pd->table[0].power; + rcu_read_lock(); + table = em_perf_state_from_pd(pd); - dtpm->power_max = pd->table[pd->nr_perf_states - 1].power; + dtpm->power_min = table[0].power; + dtpm->power_max = table[pd->nr_perf_states - 1].power; + + rcu_read_unlock(); return 0; } @@ -51,20 +56,23 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) struct devfreq *devfreq = dtpm_devfreq->devfreq; struct device *dev = devfreq->dev.parent; struct em_perf_domain *pd = em_pd_get(dev); + struct em_perf_state *table; unsigned long freq; int i; + rcu_read_lock(); + table = em_perf_state_from_pd(pd); for (i = 0; i < pd->nr_perf_states; i++) { - if (pd->table[i].power > power_limit) + if (table[i].power > power_limit) break; } - freq = pd->table[i - 1].frequency; + freq = table[i - 1].frequency; + power_limit = table[i - 1].power; + rcu_read_unlock(); dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); - power_limit = pd->table[i - 1].power; - return power_limit; } @@ -89,8 +97,9 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) struct device *dev = devfreq->dev.parent; struct em_perf_domain *pd = em_pd_get(dev); struct devfreq_dev_status status; + struct em_perf_state *table; unsigned long freq; - u64 power; + u64 power = 0; int i; mutex_lock(&devfreq->lock); @@ -100,19 +109,22 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) freq = DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ); _normalize_load(&status); + rcu_read_lock(); + table = em_perf_state_from_pd(pd); for (i = 0; i < pd->nr_perf_states; i++) { - if (pd->table[i].frequency < freq) + if (table[i].frequency < freq) continue; - power = pd->table[i].power; + power = table[i].power; power *= status.busy_time; power >>= 10; - return power; + break; } + rcu_read_unlock(); - return 0; + return power; } static void pd_release(struct dtpm *dtpm) diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c index 00c861899a47..a28d54fd5222 100644 --- a/drivers/powercap/intel_rapl_common.c +++ b/drivers/powercap/intel_rapl_common.c @@ -5,6 +5,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/cleanup.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/list.h> @@ -759,6 +760,11 @@ static int rapl_config(struct rapl_package *rp) default: return -EINVAL; } + + /* defaults_msr can be NULL on unsupported platforms */ + if (!rp->priv->defaults || !rp->priv->rpi) + return -ENODEV; + return 0; } @@ -1256,6 +1262,8 @@ static const struct x86_cpu_id rapl_ids[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &rapl_defaults_spr_server), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &rapl_defaults_spr_server), + X86_MATCH_INTEL_FAM6_MODEL(LUNARLAKE_M, &rapl_defaults_core), + X86_MATCH_INTEL_FAM6_MODEL(ARROWLAKE, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(LAKEFIELD, &rapl_defaults_core), X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &rapl_defaults_byt), @@ -1499,7 +1507,7 @@ static int rapl_detect_domains(struct rapl_package *rp) } /* called from CPU hotplug notifier, hotplug lock held */ -void rapl_remove_package(struct rapl_package *rp) +void rapl_remove_package_cpuslocked(struct rapl_package *rp) { struct rapl_domain *rd, *rd_package = NULL; @@ -1528,10 +1536,18 @@ void rapl_remove_package(struct rapl_package *rp) list_del(&rp->plist); kfree(rp); } +EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked); + +void rapl_remove_package(struct rapl_package *rp) +{ + guard(cpus_read_lock)(); + rapl_remove_package_cpuslocked(rp); +} EXPORT_SYMBOL_GPL(rapl_remove_package); /* caller to ensure CPU hotplug lock is held */ -struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) +struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv, + bool id_is_cpu) { struct rapl_package *rp; int uid; @@ -1549,10 +1565,17 @@ struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, return NULL; } +EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked); + +struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu) +{ + guard(cpus_read_lock)(); + return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu); +} EXPORT_SYMBOL_GPL(rapl_find_package_domain); /* called from CPU hotplug notifier, hotplug lock held */ -struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) +struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu) { struct rapl_package *rp; int ret; @@ -1598,6 +1621,13 @@ err_free_package: kfree(rp); return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked); + +struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu) +{ + guard(cpus_read_lock)(); + return rapl_add_package_cpuslocked(id, priv, id_is_cpu); +} EXPORT_SYMBOL_GPL(rapl_add_package); static void power_limit_state_save(void) diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c index 250bd41a588c..b4b6930cacb0 100644 --- a/drivers/powercap/intel_rapl_msr.c +++ b/drivers/powercap/intel_rapl_msr.c @@ -73,9 +73,9 @@ static int rapl_cpu_online(unsigned int cpu) { struct rapl_package *rp; - rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true); if (!rp) { - rp = rapl_add_package(cpu, rapl_msr_priv, true); + rp = rapl_add_package_cpuslocked(cpu, rapl_msr_priv, true); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -88,14 +88,14 @@ static int rapl_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu, rapl_msr_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, rapl_msr_priv, true); if (!rp) return 0; cpumask_clear_cpu(cpu, &rp->cpumask); lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) - rapl_remove_package(rp); + rapl_remove_package_cpuslocked(rp); else if (rp->lead_cpu == cpu) rp->lead_cpu = lead_cpu; return 0; diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c index 891c90fefd8b..f6b7f085977c 100644 --- a/drivers/powercap/intel_rapl_tpmi.c +++ b/drivers/powercap/intel_rapl_tpmi.c @@ -40,6 +40,7 @@ enum tpmi_rapl_register { TPMI_RAPL_REG_ENERGY_STATUS, TPMI_RAPL_REG_PERF_STATUS, TPMI_RAPL_REG_POWER_INFO, + TPMI_RAPL_REG_DOMAIN_INFO, TPMI_RAPL_REG_INTERRUPT, TPMI_RAPL_REG_MAX = 15, }; @@ -130,6 +131,12 @@ static void trp_release(struct tpmi_rapl_package *trp) mutex_unlock(&tpmi_rapl_lock); } +/* + * Bit 0 of TPMI_RAPL_REG_DOMAIN_INFO indicates if the current package is a domain + * root or not. Only domain root packages can enumerate System (Psys) Domain. + */ +#define TPMI_RAPL_DOMAIN_ROOT BIT(0) + static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) { u8 tpmi_domain_version; @@ -139,6 +146,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) enum rapl_domain_reg_id reg_id; int tpmi_domain_size, tpmi_domain_flags; u64 tpmi_domain_header = readq(trp->base + offset); + u64 tpmi_domain_info; /* Domain Parent bits are ignored for now */ tpmi_domain_version = tpmi_domain_header & 0xff; @@ -169,6 +177,13 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset) domain_type = RAPL_DOMAIN_PACKAGE; break; case TPMI_RAPL_DOMAIN_SYSTEM: + if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_DOMAIN_INFO))) { + pr_warn(FW_BUG "System domain must support Domain Info register\n"); + return -ENODEV; + } + tpmi_domain_info = readq(trp->base + offset + TPMI_RAPL_REG_DOMAIN_INFO); + if (!(tpmi_domain_info & TPMI_RAPL_DOMAIN_ROOT)) + return 0; domain_type = RAPL_DOMAIN_PLATFORM; break; case TPMI_RAPL_DOMAIN_MEMORY: diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index e2cc7bd30862..9d1b1459700d 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -91,12 +91,16 @@ struct cpufreq_cooling_device { static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, unsigned int freq) { + struct em_perf_state *table; int i; + rcu_read_lock(); + table = em_perf_state_from_pd(cpufreq_cdev->em); for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) { - if (freq > cpufreq_cdev->em->table[i].frequency) + if (freq > table[i].frequency) break; } + rcu_read_unlock(); return cpufreq_cdev->max_level - i - 1; } @@ -104,16 +108,20 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, u32 freq) { + struct em_perf_state *table; unsigned long power_mw; int i; + rcu_read_lock(); + table = em_perf_state_from_pd(cpufreq_cdev->em); for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) { - if (freq > cpufreq_cdev->em->table[i].frequency) + if (freq > table[i].frequency) break; } - power_mw = cpufreq_cdev->em->table[i + 1].power; + power_mw = table[i + 1].power; power_mw /= MICROWATT_PER_MILLIWATT; + rcu_read_unlock(); return power_mw; } @@ -121,18 +129,24 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, u32 power) { + struct em_perf_state *table; unsigned long em_power_mw; + u32 freq; int i; + rcu_read_lock(); + table = em_perf_state_from_pd(cpufreq_cdev->em); for (i = cpufreq_cdev->max_level; i > 0; i--) { /* Convert EM power to milli-Watts to make safe comparison */ - em_power_mw = cpufreq_cdev->em->table[i].power; + em_power_mw = table[i].power; em_power_mw /= MICROWATT_PER_MILLIWATT; if (power >= em_power_mw) break; } + freq = table[i].frequency; + rcu_read_unlock(); - return cpufreq_cdev->em->table[i].frequency; + return freq; } /** @@ -262,8 +276,9 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, static int cpufreq_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) { - unsigned int freq, num_cpus, idx; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; + unsigned int freq, num_cpus, idx; + struct em_perf_state *table; /* Request state should be less than max_level */ if (state > cpufreq_cdev->max_level) @@ -272,7 +287,12 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev, num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus); idx = cpufreq_cdev->max_level - state; - freq = cpufreq_cdev->em->table[idx].frequency; + + rcu_read_lock(); + table = em_perf_state_from_pd(cpufreq_cdev->em); + freq = table[idx].frequency; + rcu_read_unlock(); + *power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; return 0; @@ -378,8 +398,17 @@ static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev, #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR /* Use the Energy Model table if available */ if (cpufreq_cdev->em) { + struct em_perf_state *table; + unsigned int freq; + idx = cpufreq_cdev->max_level - state; - return cpufreq_cdev->em->table[idx].frequency; + + rcu_read_lock(); + table = em_perf_state_from_pd(cpufreq_cdev->em); + freq = table[idx].frequency; + rcu_read_unlock(); + + return freq; } #endif diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 262e62ab6cf2..50dec24e967a 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -87,6 +87,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, struct devfreq_cooling_device *dfc = cdev->devdata; struct devfreq *df = dfc->devfreq; struct device *dev = df->dev.parent; + struct em_perf_state *table; unsigned long freq; int perf_idx; @@ -100,7 +101,11 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, if (dfc->em_pd) { perf_idx = dfc->max_state - state; - freq = dfc->em_pd->table[perf_idx].frequency * 1000; + + rcu_read_lock(); + table = em_perf_state_from_pd(dfc->em_pd); + freq = table[perf_idx].frequency * 1000; + rcu_read_unlock(); } else { freq = dfc->freq_table[state]; } @@ -123,14 +128,21 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, */ static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) { - int i; + struct em_perf_state *table; + int i, idx = -EINVAL; + rcu_read_lock(); + table = em_perf_state_from_pd(em_pd); for (i = 0; i < em_pd->nr_perf_states; i++) { - if (em_pd->table[i].frequency == freq) - return i; + if (table[i].frequency != freq) + continue; + + idx = i; + break; } + rcu_read_unlock(); - return -EINVAL; + return idx; } static unsigned long get_voltage(struct devfreq *df, unsigned long freq) @@ -181,6 +193,7 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd struct devfreq_cooling_device *dfc = cdev->devdata; struct devfreq *df = dfc->devfreq; struct devfreq_dev_status status; + struct em_perf_state *table; unsigned long state; unsigned long freq; unsigned long voltage; @@ -204,7 +217,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd state = dfc->capped_state; /* Convert EM power into milli-Watts first */ - dfc->res_util = dfc->em_pd->table[state].power; + rcu_read_lock(); + table = em_perf_state_from_pd(dfc->em_pd); + dfc->res_util = table[state].power; + rcu_read_unlock(); + dfc->res_util /= MICROWATT_PER_MILLIWATT; dfc->res_util *= SCALE_ERROR_MITIGATION; @@ -225,7 +242,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd _normalize_load(&status); /* Convert EM power into milli-Watts first */ - *power = dfc->em_pd->table[perf_idx].power; + rcu_read_lock(); + table = em_perf_state_from_pd(dfc->em_pd); + *power = table[perf_idx].power; + rcu_read_unlock(); + *power /= MICROWATT_PER_MILLIWATT; /* Scale power for utilization */ *power *= status.busy_time; @@ -245,13 +266,19 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) { struct devfreq_cooling_device *dfc = cdev->devdata; + struct em_perf_state *table; int perf_idx; if (state > dfc->max_state) return -EINVAL; perf_idx = dfc->max_state - state; - *power = dfc->em_pd->table[perf_idx].power; + + rcu_read_lock(); + table = em_perf_state_from_pd(dfc->em_pd); + *power = table[perf_idx].power; + rcu_read_unlock(); + *power /= MICROWATT_PER_MILLIWATT; return 0; @@ -264,6 +291,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, struct devfreq *df = dfc->devfreq; struct devfreq_dev_status status; unsigned long freq, em_power_mw; + struct em_perf_state *table; s32 est_power; int i; @@ -288,13 +316,16 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, * Find the first cooling state that is within the power * budget. The EM power table is sorted ascending. */ + rcu_read_lock(); + table = em_perf_state_from_pd(dfc->em_pd); for (i = dfc->max_state; i > 0; i--) { /* Convert EM power to milli-Watts to make safe comparison */ - em_power_mw = dfc->em_pd->table[i].power; + em_power_mw = table[i].power; em_power_mw /= MICROWATT_PER_MILLIWATT; if (est_power >= em_power_mw) break; } + rcu_read_unlock(); *state = dfc->max_state - i; dfc->capped_state = *state; diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c index 2f00fc3bf274..e964a9375722 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c @@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu) if (topology_physical_package_id(cpu)) return 0; - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); if (!rp) { - rp = rapl_add_package(cpu, &rapl_mmio_priv, true); + rp = rapl_add_package_cpuslocked(cpu, &rapl_mmio_priv, true); if (IS_ERR(rp)) return PTR_ERR(rp); } @@ -42,14 +42,14 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu) struct rapl_package *rp; int lead_cpu; - rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true); + rp = rapl_find_package_domain_cpuslocked(cpu, &rapl_mmio_priv, true); if (!rp) return 0; cpumask_clear_cpu(cpu, &rp->cpumask); lead_cpu = cpumask_first(&rp->cpumask); if (lead_cpu >= nr_cpu_ids) - rapl_remove_package(rp); + rapl_remove_package_cpuslocked(rp); else if (rp->lead_cpu == cpu) rp->lead_cpu = lead_cpu; return 0; |