Newer
Older
cpu->pstate.scaling = pstate_funcs.get_scaling();
if (pstate_funcs.get_vid)
pstate_funcs.get_vid(cpu);
intel_pstate_set_min_pstate(cpu);
static inline void intel_pstate_calc_busy(struct cpudata *cpu)
struct sample *sample = &cpu->sample;
core_pct = int_tofp(sample->aperf) * int_tofp(100);
core_pct = div64_u64(core_pct, int_tofp(sample->mperf));
sample->core_pct_busy = (int32_t)core_pct;
static inline bool intel_pstate_sample(struct cpudata *cpu, u64 time)
{
u64 aperf, mperf;
unsigned long flags;
u64 tsc;
local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
if (cpu->prev_mperf == mperf || cpu->prev_tsc == tsc) {
local_irq_restore(flags);
}
local_irq_restore(flags);
cpu->last_sample_time = cpu->sample.time;
cpu->sample.time = time;
cpu->sample.aperf = aperf;
cpu->sample.mperf = mperf;
cpu->sample.tsc = tsc;
cpu->sample.aperf -= cpu->prev_aperf;
cpu->sample.mperf -= cpu->prev_mperf;
cpu->sample.tsc -= cpu->prev_tsc;
Dirk Brandewie
committed
cpu->prev_aperf = aperf;
cpu->prev_mperf = mperf;
cpu->prev_tsc = tsc;
/*
* First time this function is invoked in a given cycle, all of the
* previous sample data fields are equal to zero or stale and they must
* be populated with meaningful numbers for things to work, so assume
* that sample.time will always be reset before setting the utilization
* update hook and make the caller skip the sample then.
*/
return !!cpu->last_sample_time;
static inline int32_t get_avg_frequency(struct cpudata *cpu)
{
return div64_u64(cpu->pstate.max_pstate_physical * cpu->sample.aperf *
cpu->pstate.scaling, cpu->sample.mperf);
}
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
u64 cummulative_iowait, delta_iowait_us;
u64 delta_iowait_mperf;
u64 mperf, now;
int32_t cpu_load;
cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
/*
* Convert iowait time into number of IO cycles spent at max_freq.
* IO is considered as busy only for the cpu_load algorithm. For
* performance this is not needed since we always try to reach the
* maximum P-State, so we are already boosting the IOs.
*/
delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
cpu->pstate.max_pstate, MSEC_PER_SEC);
mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait;
/*
* The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods
* (C0) and the time stamp counter running at the same frequency
* also during C-states.
*/
cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
cpu->sample.busy_scaled = cpu_load;
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, cpu_load);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
int32_t core_busy, max_pstate, current_pstate, sample_ratio;
u64 duration_ns;
Philippe Longepe
committed
intel_pstate_calc_busy(cpu);
/*
* core_busy is the ratio of actual performance to max
* max_pstate is the max non turbo pstate available
* current_pstate was the pstate that was requested during
* the last sample period.
*
* We normalize core_busy, which was our actual percent
* performance to what we requested during the last sample
* period. The result will be a percentage of busy at a
* specified pstate.
*/
core_busy = cpu->sample.core_pct_busy;
max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
current_pstate = int_tofp(cpu->pstate.current_pstate);
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
* Since our utilization update callback will not run unless we are
* in C0, check if the actual elapsed time is significantly greater (3x)
* than our sample interval. If it is, then we were idle for a long
* enough period of time to adjust our busyness.
duration_ns = cpu->sample.time - cpu->last_sample_time;
if ((s64)duration_ns > pid_params.sample_rate_ns * 3) {
sample_ratio = div_fp(int_tofp(pid_params.sample_rate_ns),
int_tofp(duration_ns));
core_busy = mul_fp(core_busy, sample_ratio);
}
cpu->sample.busy_scaled = core_busy;
return cpu->pstate.current_pstate - pid_calc(&cpu->pid, core_busy);
static inline void intel_pstate_update_pstate(struct cpudata *cpu, int pstate)
{
int max_perf, min_perf;
update_turbo_state();
intel_pstate_get_min_max(cpu, &min_perf, &max_perf);
pstate = clamp_t(int, pstate, min_perf, max_perf);
if (pstate == cpu->pstate.current_pstate)
return;
intel_pstate_record_pstate(cpu, pstate);
wrmsrl(MSR_IA32_PERF_CTL, pstate_funcs.get_val(cpu, pstate));
}
static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
{
int from, target_pstate;
struct sample *sample;
from = cpu->pstate.current_pstate;
target_pstate = pstate_funcs.get_target_pstate(cpu);
intel_pstate_update_pstate(cpu, target_pstate);
sample = &cpu->sample;
trace_pstate_sample(fp_toint(sample->core_pct_busy),
fp_toint(sample->busy_scaled),
from,
cpu->pstate.current_pstate,
sample->mperf,
sample->aperf,
sample->tsc,
get_avg_frequency(cpu));
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max)
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
u64 delta_ns = time - cpu->sample.time;
if ((s64)delta_ns >= pid_params.sample_rate_ns) {
bool sample_taken = intel_pstate_sample(cpu, time);
if (sample_taken && !hwp_active)
intel_pstate_adjust_busy_pstate(cpu);
}
}
#define ICPU(model, policy) \
{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF,\
(unsigned long)&policy }
static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
ICPU(0x2a, core_params),
ICPU(0x2d, core_params),
ICPU(0x37, silvermont_params),
ICPU(0x3a, core_params),
ICPU(0x3c, core_params),
ICPU(0x3d, core_params),
ICPU(0x3e, core_params),
ICPU(0x3f, core_params),
ICPU(0x45, core_params),
ICPU(0x46, core_params),
ICPU(0x4c, airmont_params),
ICPU(0x4e, core_params),
ICPU(0x4f, core_params),
ICPU(0x56, core_params),
ICPU(0x57, knl_params),
{}
};
MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
static const struct x86_cpu_id intel_pstate_cpu_oob_ids[] = {
ICPU(0x56, core_params),
{}
};
static int intel_pstate_init_cpu(unsigned int cpunum)
{
struct cpudata *cpu;
if (!all_cpu_data[cpunum])
all_cpu_data[cpunum] = kzalloc(sizeof(struct cpudata),
GFP_KERNEL);
if (!all_cpu_data[cpunum])
return -ENOMEM;
cpu = all_cpu_data[cpunum];
cpu->cpu = cpunum;
if (hwp_active) {
intel_pstate_hwp_enable(cpu);
pid_params.sample_rate_ms = 50;
pid_params.sample_rate_ns = 50 * NSEC_PER_MSEC;
}
intel_pstate_get_cpu_pstates(cpu);
intel_pstate_busy_pid_reset(cpu);
pr_debug("intel_pstate: controlling: cpu %d\n", cpunum);
return 0;
}
static unsigned int intel_pstate_get(unsigned int cpu_num)
{
struct sample *sample;
struct cpudata *cpu;
cpu = all_cpu_data[cpu_num];
if (!cpu)
return 0;
return get_avg_frequency(cpu);
static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
struct cpudata *cpu = all_cpu_data[cpu_num];
/* Prevent intel_pstate_update_util() from using stale data. */
cpu->sample.time = 0;
cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
intel_pstate_update_util);
}
static void intel_pstate_clear_update_util_hook(unsigned int cpu)
{
cpufreq_remove_update_util_hook(cpu);
synchronize_sched();
}
static void intel_pstate_set_performance_limits(struct perf_limits *limits)
{
limits->no_turbo = 0;
limits->turbo_disabled = 0;
limits->max_perf_pct = 100;
limits->max_perf = int_tofp(1);
limits->min_perf_pct = 100;
limits->min_perf = int_tofp(1);
limits->max_policy_pct = 100;
limits->max_sysfs_pct = 100;
limits->min_policy_pct = 0;
limits->min_sysfs_pct = 0;
}
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
Dirk Brandewie
committed
if (!policy->cpuinfo.max_freq)
return -ENODEV;
intel_pstate_clear_update_util_hook(policy->cpu);
if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) {
limits = &performance_limits;
if (policy->max >= policy->cpuinfo.max_freq) {
pr_debug("intel_pstate: set performance\n");
intel_pstate_set_performance_limits(limits);
goto out;
}
} else {
pr_debug("intel_pstate: set powersave\n");
limits = &powersave_limits;
limits->min_policy_pct = (policy->min * 100) / policy->cpuinfo.max_freq;
limits->min_policy_pct = clamp_t(int, limits->min_policy_pct, 0 , 100);
limits->max_policy_pct = DIV_ROUND_UP(policy->max * 100,
policy->cpuinfo.max_freq);
limits->max_policy_pct = clamp_t(int, limits->max_policy_pct, 0 , 100);
/* Normalize user input to [min_policy_pct, max_policy_pct] */
limits->min_perf_pct = max(limits->min_policy_pct,
limits->min_sysfs_pct);
limits->min_perf_pct = min(limits->max_policy_pct,
limits->min_perf_pct);
limits->max_perf_pct = min(limits->max_policy_pct,
limits->max_sysfs_pct);
limits->max_perf_pct = max(limits->min_policy_pct,
limits->max_perf_pct);
limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
/* Make sure min_perf_pct <= max_perf_pct */
limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
limits->min_perf = div_fp(int_tofp(limits->min_perf_pct),
int_tofp(100));
limits->max_perf = div_fp(int_tofp(limits->max_perf_pct),
int_tofp(100));
out:
intel_pstate_set_update_util_hook(policy->cpu);
intel_pstate_hwp_set(policy->cpus);
return 0;
}
static int intel_pstate_verify_policy(struct cpufreq_policy *policy)
{
cpufreq_verify_within_cpu_limits(policy);
if (policy->policy != CPUFREQ_POLICY_POWERSAVE &&
policy->policy != CPUFREQ_POLICY_PERFORMANCE)
return -EINVAL;
return 0;
}
static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
int cpu_num = policy->cpu;
struct cpudata *cpu = all_cpu_data[cpu_num];
pr_debug("intel_pstate: CPU %d exiting\n", cpu_num);
intel_pstate_clear_update_util_hook(cpu_num);
intel_pstate_set_min_pstate(cpu);
static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
rc = intel_pstate_init_cpu(policy->cpu);
if (rc)
return rc;
cpu = all_cpu_data[policy->cpu];
if (limits->min_perf_pct == 100 && limits->max_perf_pct == 100)
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;
policy->min = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->max = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
/* cpuinfo and default policy values */
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->cpuinfo.max_freq =
cpu->pstate.turbo_pstate * cpu->pstate.scaling;
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
return 0;
}
static struct cpufreq_driver intel_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
.stop_cpu = intel_pstate_stop_cpu,
.name = "intel_pstate",
};
Dirk Brandewie
committed
static int __initdata no_load;
static int __initdata hwp_only;
static unsigned int force_load;
Dirk Brandewie
committed
static int intel_pstate_msrs_not_valid(void)
{
if (!pstate_funcs.get_max() ||
!pstate_funcs.get_min() ||
!pstate_funcs.get_turbo())
return -ENODEV;
return 0;
}
static void copy_pid_params(struct pstate_adjust_policy *policy)
{
pid_params.sample_rate_ms = policy->sample_rate_ms;
pid_params.sample_rate_ns = pid_params.sample_rate_ms * NSEC_PER_MSEC;
pid_params.p_gain_pct = policy->p_gain_pct;
pid_params.i_gain_pct = policy->i_gain_pct;
pid_params.d_gain_pct = policy->d_gain_pct;
pid_params.deadband = policy->deadband;
pid_params.setpoint = policy->setpoint;
}
static void copy_cpu_funcs(struct pstate_funcs *funcs)
{
pstate_funcs.get_max = funcs->get_max;
pstate_funcs.get_max_physical = funcs->get_max_physical;
pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo;
pstate_funcs.get_scaling = funcs->get_scaling;
pstate_funcs.get_val = funcs->get_val;
pstate_funcs.get_vid = funcs->get_vid;
pstate_funcs.get_target_pstate = funcs->get_target_pstate;
}
#if IS_ENABLED(CONFIG_ACPI)
#include <acpi/processor.h>
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
static bool intel_pstate_no_acpi_pss(void)
{
int i;
for_each_possible_cpu(i) {
acpi_status status;
union acpi_object *pss;
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
struct acpi_processor *pr = per_cpu(processors, i);
if (!pr)
continue;
status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer);
if (ACPI_FAILURE(status))
continue;
pss = buffer.pointer;
if (pss && pss->type == ACPI_TYPE_PACKAGE) {
kfree(pss);
return false;
}
kfree(pss);
}
return true;
}
static bool intel_pstate_has_acpi_ppc(void)
{
int i;
for_each_possible_cpu(i) {
struct acpi_processor *pr = per_cpu(processors, i);
if (!pr)
continue;
if (acpi_has_method(pr->handle, "_PPC"))
return true;
}
return false;
}
enum {
PSS,
PPC,
};
struct hw_vendor_info {
u16 valid;
char oem_id[ACPI_OEM_ID_SIZE];
char oem_table_id[ACPI_OEM_TABLE_ID_SIZE];
int oem_pwr_table;
};
/* Hardware vendor-specific info that has its own power management modes */
static struct hw_vendor_info vendor_info[] = {
{1, "HP ", "ProLiant", PSS},
{1, "ORACLE", "X4-2 ", PPC},
{1, "ORACLE", "X4-2L ", PPC},
{1, "ORACLE", "X4-2B ", PPC},
{1, "ORACLE", "X3-2 ", PPC},
{1, "ORACLE", "X3-2L ", PPC},
{1, "ORACLE", "X3-2B ", PPC},
{1, "ORACLE", "X4470M2 ", PPC},
{1, "ORACLE", "X4270M3 ", PPC},
{1, "ORACLE", "X4270M2 ", PPC},
{1, "ORACLE", "X4170M2 ", PPC},
{1, "ORACLE", "X4170 M3", PPC},
{1, "ORACLE", "X4275 M3", PPC},
{1, "ORACLE", "X6-2 ", PPC},
{1, "ORACLE", "Sudbury ", PPC},
{0, "", ""},
};
static bool intel_pstate_platform_pwr_mgmt_exists(void)
{
struct acpi_table_header hdr;
struct hw_vendor_info *v_info;
const struct x86_cpu_id *id;
u64 misc_pwr;
id = x86_match_cpu(intel_pstate_cpu_oob_ids);
if (id) {
rdmsrl(MSR_MISC_PWR_MGMT, misc_pwr);
if ( misc_pwr & (1 << 8))
return true;
}
if (acpi_disabled ||
ACPI_FAILURE(acpi_get_table_header(ACPI_SIG_FADT, 0, &hdr)))
return false;
for (v_info = vendor_info; v_info->valid; v_info++) {
if (!strncmp(hdr.oem_id, v_info->oem_id, ACPI_OEM_ID_SIZE) &&
!strncmp(hdr.oem_table_id, v_info->oem_table_id,
ACPI_OEM_TABLE_ID_SIZE))
switch (v_info->oem_pwr_table) {
case PSS:
return intel_pstate_no_acpi_pss();
case PPC:
return intel_pstate_has_acpi_ppc() &&
(!force_load);
}
return false;
}
#else /* CONFIG_ACPI not enabled */
static inline bool intel_pstate_platform_pwr_mgmt_exists(void) { return false; }
static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
#endif /* CONFIG_ACPI */
static const struct x86_cpu_id hwp_support_ids[] __initconst = {
{ X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
{}
};
static int __init intel_pstate_init(void)
{
int cpu, rc = 0;
const struct x86_cpu_id *id;
struct cpu_defaults *cpu_def;
Dirk Brandewie
committed
if (no_load)
return -ENODEV;
if (x86_match_cpu(hwp_support_ids) && !no_hwp) {
copy_cpu_funcs(&core_params.funcs);
hwp_active++;
goto hwp_cpu_matched;
}
id = x86_match_cpu(intel_pstate_cpu_ids);
if (!id)
return -ENODEV;
cpu_def = (struct cpu_defaults *)id->driver_data;
copy_pid_params(&cpu_def->pid_policy);
copy_cpu_funcs(&cpu_def->funcs);
if (intel_pstate_msrs_not_valid())
return -ENODEV;
hwp_cpu_matched:
/*
* The Intel pstate driver will be ignored if the platform
* firmware has its own power management modes.
*/
if (intel_pstate_platform_pwr_mgmt_exists())
return -ENODEV;
pr_info("Intel P-state driver initializing.\n");
all_cpu_data = vzalloc(sizeof(void *) * num_possible_cpus());
if (!all_cpu_data)
return -ENOMEM;
if (!hwp_active && hwp_only)
goto out;
rc = cpufreq_register_driver(&intel_pstate_driver);
if (rc)
goto out;
intel_pstate_debug_expose_params();
intel_pstate_sysfs_expose_params();
if (hwp_active)
pr_info("intel_pstate: HWP enabled\n");
return rc;
out:
get_online_cpus();
for_each_online_cpu(cpu) {
if (all_cpu_data[cpu]) {
intel_pstate_clear_update_util_hook(cpu);
kfree(all_cpu_data[cpu]);
}
}
put_online_cpus();
vfree(all_cpu_data);
return -ENODEV;
}
device_initcall(intel_pstate_init);
Dirk Brandewie
committed
static int __init intel_pstate_setup(char *str)
{
if (!str)
return -EINVAL;
if (!strcmp(str, "disable"))
no_load = 1;
if (!strcmp(str, "no_hwp")) {
pr_info("intel_pstate: HWP disabled\n");
if (!strcmp(str, "force"))
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
Dirk Brandewie
committed
return 0;
}
early_param("intel_pstate", intel_pstate_setup);
MODULE_AUTHOR("Dirk Brandewie <dirk.j.brandewie@intel.com>");
MODULE_DESCRIPTION("'intel_pstate' - P state driver Intel Core processors");
MODULE_LICENSE("GPL");