Merge branch 'pm-cpufreq'

* pm-cpufreq: (94 commits)
  intel_pstate: Do not skip samples partially
  intel_pstate: Remove freq calculation from intel_pstate_calc_busy()
  intel_pstate: Move intel_pstate_calc_busy() into get_target_pstate_use_performance()
  intel_pstate: Optimize calculation for max/min_perf_adj
  intel_pstate: Remove extra conversions in pid calculation
  cpufreq: Move scheduler-related code to the sched directory
  Revert "cpufreq: postfix policy directory with the first CPU in related_cpus"
  cpufreq: Reduce cpufreq_update_util() overhead a bit
  cpufreq: Select IRQ_WORK if CPU_FREQ_GOV_COMMON is set
  cpufreq: Remove 'policy->governor_enabled'
  cpufreq: Rename __cpufreq_governor() to cpufreq_governor()
  cpufreq: Relocate handle_update() to kill its declaration
  cpufreq: governor: Drop unnecessary checks from show() and store()
  cpufreq: governor: Fix race in dbs_update_util_handler()
  cpufreq: governor: Make gov_set_update_util() static
  cpufreq: governor: Narrow down the dbs_data_mutex coverage
  cpufreq: governor: Make dbs_data_mutex static
  cpufreq: governor: Relocate definitions of tuners structures
  cpufreq: governor: Move per-CPU data to the common code
  cpufreq: governor: Make governor private data per-policy
  ...
This commit is contained in:
Rafael J. Wysocki
2016-03-14 14:22:03 +01:00
26 changed files with 1566 additions and 1781 deletions

View File

@@ -19,3 +19,4 @@ obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
obj-$(CONFIG_SCHED_DEBUG) += debug.o
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
obj-$(CONFIG_CPU_FREQ) += cpufreq.o

37
kernel/sched/cpufreq.c Normal file
View File

@@ -0,0 +1,37 @@
/*
* Scheduler code and data structures related to cpufreq.
*
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include "sched.h"
DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_set_update_util_data - Populate the CPU's update_util_data pointer.
* @cpu: The CPU to set the pointer for.
* @data: New pointer value.
*
* Set and publish the update_util_data pointer for the given CPU. That pointer
* points to a struct update_util_data object containing a callback function
* to call from cpufreq_update_util(). That function will be called from an RCU
* read-side critical section, so it must not sleep.
*
* Callers must use RCU-sched callbacks to free any memory that might be
* accessed via the old update_util_data pointer or invoke synchronize_sched()
* right after this function to avoid use-after-free.
*/
void cpufreq_set_update_util_data(int cpu, struct update_util_data *data)
{
if (WARN_ON(data && !data->func))
return;
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
}
EXPORT_SYMBOL_GPL(cpufreq_set_update_util_data);

View File

@@ -726,6 +726,10 @@ static void update_curr_dl(struct rq *rq)
if (!dl_task(curr) || !on_dl_rq(dl_se))
return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
/*
* Consumed budget is computed considering the time as
* observed by schedulable tasks (excluding time spent

View File

@@ -2824,7 +2824,8 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
u64 now = cfs_rq_clock_task(cfs_rq);
int cpu = cpu_of(rq_of(cfs_rq));
struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
/*
* Track task load average for carrying it to new CPU after migrated, and
@@ -2836,6 +2837,29 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
update_tg_load_avg(cfs_rq, 0);
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
* a real problem -- added to that it only calls on the local
* CPU, so if we enqueue remotely we'll miss an update, but
* the next tick/schedule should update.
*
* It will not get called when we go idle, because the idle
* thread is a different class (!fair), nor will the utilization
* number include things like RT tasks.
*
* As is, the util number is not freq-invariant (we'd have to
* implement arch_scale_freq_capacity() for that).
*
* See cpu_util().
*/
cpufreq_update_util(rq_clock(rq),
min(cfs_rq->avg.util_avg, max), max);
}
}
static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)

View File

@@ -945,6 +945,10 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != &rt_sched_class)
return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;

View File

@@ -1738,3 +1738,51 @@ static inline u64 irq_time_read(int cpu)
}
#endif /* CONFIG_64BIT */
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
#ifdef CONFIG_CPU_FREQ
DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @time: Current time.
* @util: Current utilization.
* @max: Utilization ceiling.
*
* This function is called by the scheduler on every invocation of
* update_load_avg() on the CPU whose utilization is being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*/
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, time, util, max);
}
/**
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
* @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
* being stuck in a completely inadequate performance level for too long.
* That is not guaranteed to happen if the updates are only triggered from CFS,
* though, because they may not be coming in if RT or deadline tasks are active
* all the time (or there are RT and DL tasks only).
*
* As a workaround for that issue, this function is called by the RT and DL
* sched classes to trigger extra cpufreq updates to prevent it from stalling,
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
static inline void cpufreq_trigger_update(u64 time)
{
cpufreq_update_util(time, ULONG_MAX, 0);
}
#else
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
#endif /* CONFIG_CPU_FREQ */

View File

@@ -15,4 +15,5 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(suspend_resume);
EXPORT_TRACEPOINT_SYMBOL_GPL(cpu_idle);
EXPORT_TRACEPOINT_SYMBOL_GPL(powernv_throttle);