diff options
Diffstat (limited to 'drivers/cpufreq/intel_pstate.c')
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 221 |
1 files changed, 188 insertions, 33 deletions
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 30fe323c4551..a0823e84ceca 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -10,6 +10,8 @@ * of the License. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/kernel_stat.h> #include <linux/module.h> @@ -39,6 +41,10 @@ #define ATOM_TURBO_RATIOS 0x66c #define ATOM_TURBO_VIDS 0x66d +#ifdef CONFIG_ACPI +#include <acpi/processor.h> +#endif + #define FRAC_BITS 8 #define int_tofp(X) ((int64_t)(X) << FRAC_BITS) #define fp_toint(X) ((X) >> FRAC_BITS) @@ -172,6 +178,8 @@ struct _pid { * @prev_cummulative_iowait: IO Wait time difference from last and * current sample * @sample: Storage for storing last Sample data + * @acpi_perf_data: Stores ACPI perf information read from _PSS + * @valid_pss_table: Set to true for valid ACPI _PSS entries found * * This structure stores per CPU instance data for all CPUs. */ @@ -190,6 +198,10 @@ struct cpudata { u64 prev_tsc; u64 prev_cummulative_iowait; struct sample sample; +#ifdef CONFIG_ACPI + struct acpi_processor_performance acpi_perf_data; + bool valid_pss_table; +#endif }; static struct cpudata **all_cpu_data; @@ -258,6 +270,9 @@ static struct pstate_adjust_policy pid_params; static struct pstate_funcs pstate_funcs; static int hwp_active; +#ifdef CONFIG_ACPI +static bool acpi_ppc; +#endif /** * struct perf_limits - Store user and policy limits @@ -331,6 +346,121 @@ static struct perf_limits *limits = &performance_limits; static struct perf_limits *limits = &powersave_limits; #endif +#ifdef CONFIG_ACPI + +static bool intel_pstate_get_ppc_enable_status(void) +{ + if (acpi_gbl_FADT.preferred_profile == PM_ENTERPRISE_SERVER || + acpi_gbl_FADT.preferred_profile == PM_PERFORMANCE_SERVER) + return true; + + return acpi_ppc; +} + +/* + * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and + * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and + * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state + * ratio, out of it only high 8 bits are used. For example 0x1700 is setting + * target ratio 0x17. The _PSS control value stores in a format which can be + * directly written to PERF_CTL MSR. But in intel_pstate driver this shift + * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()). + * This function converts the _PSS control value to intel pstate driver format + * for comparison and assignment. + */ +static int convert_to_native_pstate_format(struct cpudata *cpu, int index) +{ + return cpu->acpi_perf_data.states[index].control >> 8; +} + +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + int turbo_pss_ctl; + int ret; + int i; + + if (!intel_pstate_get_ppc_enable_status()) + return; + + cpu = all_cpu_data[policy->cpu]; + + ret = acpi_processor_register_performance(&cpu->acpi_perf_data, + policy->cpu); + if (ret) + return; + + /* + * Check if the control value in _PSS is for PERF_CTL MSR, which should + * guarantee that the states returned by it map to the states in our + * list directly. + */ + if (cpu->acpi_perf_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) + goto err; + + /* + * If there is only one entry _PSS, simply ignore _PSS and continue as + * usual without taking _PSS into account + */ + if (cpu->acpi_perf_data.state_count < 2) + goto err; + + pr_debug("CPU%u - ACPI _PSS perf data\n", policy->cpu); + for (i = 0; i < cpu->acpi_perf_data.state_count; i++) { + pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n", + (i == cpu->acpi_perf_data.state ? '*' : ' '), i, + (u32) cpu->acpi_perf_data.states[i].core_frequency, + (u32) cpu->acpi_perf_data.states[i].power, + (u32) cpu->acpi_perf_data.states[i].control); + } + + /* + * The _PSS table doesn't contain whole turbo frequency range. + * This just contains +1 MHZ above the max non turbo frequency, + * with control value corresponding to max turbo ratio. But + * when cpufreq set policy is called, it will call with this + * max frequency, which will cause a reduced performance as + * this driver uses real max turbo frequency as the max + * frequency. So correct this frequency in _PSS table to + * correct max turbo frequency based on the turbo ratio. + * Also need to convert to MHz as _PSS freq is in MHz. + */ + turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0); + if (turbo_pss_ctl > cpu->pstate.max_pstate) + cpu->acpi_perf_data.states[0].core_frequency = + policy->cpuinfo.max_freq / 1000; + cpu->valid_pss_table = true; + pr_info("_PPC limits will be enforced\n"); + + return; + + err: + cpu->valid_pss_table = false; + acpi_processor_unregister_performance(policy->cpu); +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ + struct cpudata *cpu; + + cpu = all_cpu_data[policy->cpu]; + if (!cpu->valid_pss_table) + return; + + acpi_processor_unregister_performance(policy->cpu); +} + +#else +static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) +{ +} + +static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy) +{ +} +#endif + static inline void pid_reset(struct _pid *pid, int setpoint, int busy, int deadband, int integral) { pid->setpoint = int_tofp(setpoint); @@ -341,17 +471,17 @@ static inline void pid_reset(struct _pid *pid, int setpoint, int busy, static inline void pid_p_gain_set(struct _pid *pid, int percent) { - pid->p_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->p_gain = div_fp(percent, 100); } static inline void pid_i_gain_set(struct _pid *pid, int percent) { - pid->i_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->i_gain = div_fp(percent, 100); } static inline void pid_d_gain_set(struct _pid *pid, int percent) { - pid->d_gain = div_fp(int_tofp(percent), int_tofp(100)); + pid->d_gain = div_fp(percent, 100); } static signed int pid_calc(struct _pid *pid, int32_t busy) @@ -529,7 +659,7 @@ static ssize_t show_turbo_pct(struct kobject *kobj, total = cpu->pstate.turbo_pstate - cpu->pstate.min_pstate + 1; no_turbo = cpu->pstate.max_pstate - cpu->pstate.min_pstate + 1; - turbo_fp = div_fp(int_tofp(no_turbo), int_tofp(total)); + turbo_fp = div_fp(no_turbo, total); turbo_pct = 100 - fp_toint(mul_fp(turbo_fp, int_tofp(100))); return sprintf(buf, "%u\n", turbo_pct); } @@ -571,7 +701,7 @@ static ssize_t store_no_turbo(struct kobject *a, struct attribute *b, update_turbo_state(); if (limits->turbo_disabled) { - pr_warn("intel_pstate: Turbo disabled by BIOS or unavailable on processor\n"); + pr_warn("Turbo disabled by BIOS or unavailable on processor\n"); return -EPERM; } @@ -600,8 +730,7 @@ static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b, limits->max_perf_pct); limits->max_perf_pct = max(limits->min_perf_pct, limits->max_perf_pct); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->max_perf = div_fp(limits->max_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -625,8 +754,7 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b, limits->min_perf_pct); limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); if (hwp_active) intel_pstate_hwp_set_online_cpus(); @@ -1011,8 +1139,8 @@ static inline void intel_pstate_calc_busy(struct cpudata *cpu) struct sample *sample = &cpu->sample; int64_t core_pct; - core_pct = int_tofp(sample->aperf) * int_tofp(100); - core_pct = div64_u64(core_pct, int_tofp(sample->mperf)); + core_pct = sample->aperf * int_tofp(100); + core_pct = div64_u64(core_pct, sample->mperf); sample->core_pct_busy = (int32_t)core_pct; } @@ -1061,6 +1189,12 @@ static inline int32_t get_avg_frequency(struct cpudata *cpu) cpu->pstate.scaling, cpu->sample.mperf); } +static inline int32_t get_avg_pstate(struct cpudata *cpu) +{ + return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf, + cpu->sample.mperf); +} + static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) { struct sample *sample = &cpu->sample; @@ -1093,7 +1227,7 @@ static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc); cpu->sample.busy_scaled = cpu_load; - return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load); + return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load); } static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) @@ -1115,8 +1249,8 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) * specified pstate. */ core_busy = cpu->sample.core_pct_busy; - max_pstate = int_tofp(cpu->pstate.max_pstate_physical); - current_pstate = int_tofp(cpu->pstate.current_pstate); + max_pstate = cpu->pstate.max_pstate_physical; + current_pstate = cpu->pstate.current_pstate; core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); /* @@ -1127,8 +1261,7 @@ static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) */ duration_ns = cpu->sample.time - cpu->last_sample_time; if ((s64)duration_ns > pid_params.sample_rate_ns * 3) { - sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns), - int_tofp(duration_ns)); + sample_ratio = div_fp(pid_params.sample_rate_ns, duration_ns); core_busy = mul_fp(core_busy, sample_ratio); } else { sample_ratio = div_fp(100 * cpu->sample.mperf, cpu->sample.tsc); @@ -1246,9 +1379,7 @@ static int intel_pstate_init_cpu(unsigned int cpunum) intel_pstate_busy_pid_reset(cpu); - cpu->update_util.func = intel_pstate_update_util; - - pr_debug("intel_pstate: controlling: cpu %d\n", cpunum); + pr_debug("controlling: cpu %d\n", cpunum); return 0; } @@ -1271,12 +1402,13 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num) /* Prevent intel_pstate_update_util() from using stale data. */ cpu->sample.time = 0; - cpufreq_set_update_util_data(cpu_num, &cpu->update_util); + cpufreq_add_update_util_hook(cpu_num, &cpu->update_util, + intel_pstate_update_util); } static void intel_pstate_clear_update_util_hook(unsigned int cpu) { - cpufreq_set_update_util_data(cpu, NULL); + cpufreq_remove_update_util_hook(cpu); synchronize_sched(); } @@ -1296,20 +1428,31 @@ static void intel_pstate_set_performance_limits(struct perf_limits *limits) static int intel_pstate_set_policy(struct cpufreq_policy *policy) { + struct cpudata *cpu; + if (!policy->cpuinfo.max_freq) return -ENODEV; intel_pstate_clear_update_util_hook(policy->cpu); + cpu = all_cpu_data[0]; + if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate) { + if (policy->max < policy->cpuinfo.max_freq && + policy->max > cpu->pstate.max_pstate * cpu->pstate.scaling) { + pr_debug("policy->max > max non turbo frequency\n"); + policy->max = policy->cpuinfo.max_freq; + } + } + if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) { limits = &performance_limits; if (policy->max >= policy->cpuinfo.max_freq) { - pr_debug("intel_pstate: set performance\n"); + pr_debug("set performance\n"); intel_pstate_set_performance_limits(limits); goto out; } } else { - pr_debug("intel_pstate: set powersave\n"); + pr_debug("set powersave\n"); limits = &powersave_limits; } @@ -1333,10 +1476,8 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy) /* Make sure min_perf_pct <= max_perf_pct */ limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct); - limits->min_perf = div_fp(int_tofp(limits->min_perf_pct), - int_tofp(100)); - limits->max_perf = div_fp(int_tofp(limits->max_perf_pct), - int_tofp(100)); + limits->min_perf = div_fp(limits->min_perf_pct, 100); + limits->max_perf = div_fp(limits->max_perf_pct, 100); out: intel_pstate_set_update_util_hook(policy->cpu); @@ -1363,7 +1504,7 @@ static void intel_pstate_stop_cpu(struct cpufreq_policy *policy) int cpu_num = policy->cpu; struct cpudata *cpu = all_cpu_data[cpu_num]; - pr_debug("intel_pstate: CPU %d exiting\n", cpu_num); + pr_debug("CPU %d exiting\n", cpu_num); intel_pstate_clear_update_util_hook(cpu_num); @@ -1396,18 +1537,27 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy) policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling; policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + intel_pstate_init_acpi_perf_limits(policy); policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; cpumask_set_cpu(policy->cpu, policy->cpus); return 0; } +static int intel_pstate_cpu_exit(struct cpufreq_policy *policy) +{ + intel_pstate_exit_perf_limits(policy); + + return 0; +} + static struct cpufreq_driver intel_pstate_driver = { .flags = CPUFREQ_CONST_LOOPS, .verify = intel_pstate_verify_policy, .setpolicy = intel_pstate_set_policy, .get = intel_pstate_get, .init = intel_pstate_cpu_init, + .exit = intel_pstate_cpu_exit, .stop_cpu = intel_pstate_stop_cpu, .name = "intel_pstate", }; @@ -1451,8 +1601,7 @@ static void copy_cpu_funcs(struct pstate_funcs *funcs) } -#if IS_ENABLED(CONFIG_ACPI) -#include <acpi/processor.h> +#ifdef CONFIG_ACPI static bool intel_pstate_no_acpi_pss(void) { @@ -1608,7 +1757,7 @@ hwp_cpu_matched: if (intel_pstate_platform_pwr_mgmt_exists()) return -ENODEV; - pr_info("Intel P-state driver initializing.\n"); + pr_info("Intel P-state driver initializing\n"); all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus()); if (!all_cpu_data) @@ -1625,7 +1774,7 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) - pr_info("intel_pstate: HWP enabled\n"); + pr_info("HWP enabled\n"); return rc; out: @@ -1651,13 +1800,19 @@ static int __init intel_pstate_setup(char *str) if (!strcmp(str, "disable")) no_load = 1; if (!strcmp(str, "no_hwp")) { - pr_info("intel_pstate: HWP disabled\n"); + pr_info("HWP disabled\n"); no_hwp = 1; } if (!strcmp(str, "force")) force_load = 1; if (!strcmp(str, "hwp_only")) hwp_only = 1; + +#ifdef CONFIG_ACPI + if (!strcmp(str, "support_acpi_ppc")) + acpi_ppc = true; +#endif + return 0; } early_param("intel_pstate", intel_pstate_setup); |