Merge branches 'pm-cpuidle' and 'pm-qos'

* pm-cpuidle:
  tick-sched: avoid a maybe-uninitialized warning
  cpuidle: Add definition of residency to sysfs documentation
  time: hrtimer: Use timerqueue_iterate_next() to get to the next timer
  nohz: Avoid duplication of code related to got_idle_tick
  nohz: Gather tick_sched booleans under a common flag field
  cpuidle: menu: Avoid selecting shallow states with stopped tick
  cpuidle: menu: Refine idle state selection for running tick
  sched: idle: Select idle state before stopping the tick
  time: hrtimer: Introduce hrtimer_next_event_without()
  time: tick-sched: Split tick_nohz_stop_sched_tick()
  cpuidle: Return nohz hint from cpuidle_select()
  jiffies: Introduce USER_TICK_USEC and redefine TICK_USEC
  sched: idle: Do not stop the tick before cpuidle_idle_call()
  sched: idle: Do not stop the tick upfront in the idle loop
  time: tick-sched: Reorganize idle tick management code

* pm-qos:
  PM / QoS: mark expected switch fall-throughs
This commit is contained in:
Rafael J. Wysocki
2018-04-11 13:22:46 +02:00
16 changed files with 417 additions and 122 deletions

View File

@@ -295,6 +295,7 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node,
* changed
*/
plist_del(node, &c->list);
/* fall through */
case PM_QOS_ADD_REQ:
plist_node_init(node, new_value);
plist_add(node, &c->list);
@@ -367,6 +368,7 @@ bool pm_qos_update_flags(struct pm_qos_flags *pqf,
break;
case PM_QOS_UPDATE_REQ:
pm_qos_flags_remove_req(pqf, req);
/* fall through */
case PM_QOS_ADD_REQ:
req->flags = val;
INIT_LIST_HEAD(&req->node);

View File

@@ -141,13 +141,15 @@ static void cpuidle_idle_call(void)
}
/*
* Tell the RCU framework we are entering an idle section,
* so no more rcu read side critical sections and one more
* The RCU framework needs to be told that we are entering an idle
* section, so no more rcu read side critical sections and one more
* step to the grace period
*/
rcu_idle_enter();
if (cpuidle_not_available(drv, dev)) {
tick_nohz_idle_stop_tick();
rcu_idle_enter();
default_idle_call();
goto exit_idle;
}
@@ -164,20 +166,37 @@ static void cpuidle_idle_call(void)
if (idle_should_enter_s2idle() || dev->use_deepest_state) {
if (idle_should_enter_s2idle()) {
rcu_idle_enter();
entered_state = cpuidle_enter_s2idle(drv, dev);
if (entered_state > 0) {
local_irq_enable();
goto exit_idle;
}
rcu_idle_exit();
}
tick_nohz_idle_stop_tick();
rcu_idle_enter();
next_state = cpuidle_find_deepest_state(drv, dev);
call_cpuidle(drv, dev, next_state);
} else {
bool stop_tick = true;
/*
* Ask the cpuidle framework to choose a convenient idle state.
*/
next_state = cpuidle_select(drv, dev);
next_state = cpuidle_select(drv, dev, &stop_tick);
if (stop_tick)
tick_nohz_idle_stop_tick();
else
tick_nohz_idle_retain_tick();
rcu_idle_enter();
entered_state = call_cpuidle(drv, dev, next_state);
/*
* Give the governor an opportunity to reflect on the outcome
@@ -222,6 +241,7 @@ static void do_idle(void)
rmb();
if (cpu_is_offline(cpu)) {
tick_nohz_idle_stop_tick_protected();
cpuhp_report_idle_dead();
arch_cpu_idle_dead();
}
@@ -235,10 +255,12 @@ static void do_idle(void)
* broadcast device expired for us, we don't want to go deep
* idle as we know that the IPI is going to arrive right away.
*/
if (cpu_idle_force_poll || tick_check_broadcast_expired())
if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
tick_nohz_idle_restart_tick();
cpu_idle_poll();
else
} else {
cpuidle_idle_call();
}
arch_cpu_idle_exit();
}

View File

@@ -480,6 +480,7 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
while ((base = __next_base((cpu_base), &(active))))
static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
const struct hrtimer *exclude,
unsigned int active,
ktime_t expires_next)
{
@@ -492,9 +493,22 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
next = timerqueue_getnext(&base->active);
timer = container_of(next, struct hrtimer, node);
if (timer == exclude) {
/* Get to the next timer in the queue. */
next = timerqueue_iterate_next(next);
if (!next)
continue;
timer = container_of(next, struct hrtimer, node);
}
expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
if (expires < expires_next) {
expires_next = expires;
/* Skip cpu_base update if a timer is being excluded. */
if (exclude)
continue;
if (timer->is_soft)
cpu_base->softirq_next_timer = timer;
else
@@ -538,7 +552,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
cpu_base->softirq_next_timer = NULL;
expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX);
expires_next = __hrtimer_next_event_base(cpu_base, NULL,
active, KTIME_MAX);
next_timer = cpu_base->softirq_next_timer;
}
@@ -546,7 +561,8 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
if (active_mask & HRTIMER_ACTIVE_HARD) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
cpu_base->next_timer = next_timer;
expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next);
expires_next = __hrtimer_next_event_base(cpu_base, NULL, active,
expires_next);
}
return expires_next;
@@ -1190,6 +1206,39 @@ u64 hrtimer_get_next_event(void)
return expires;
}
/**
* hrtimer_next_event_without - time until next expiry event w/o one timer
* @exclude: timer to exclude
*
* Returns the next expiry time over all timers except for the @exclude one or
* KTIME_MAX if none of them is pending.
*/
u64 hrtimer_next_event_without(const struct hrtimer *exclude)
{
struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
u64 expires = KTIME_MAX;
unsigned long flags;
raw_spin_lock_irqsave(&cpu_base->lock, flags);
if (__hrtimer_hres_active(cpu_base)) {
unsigned int active;
if (!cpu_base->softirq_activated) {
active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
expires = __hrtimer_next_event_base(cpu_base, exclude,
active, KTIME_MAX);
}
active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
expires = __hrtimer_next_event_base(cpu_base, exclude, active,
expires);
}
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
return expires;
}
#endif
static inline int hrtimer_clockid_to_base(clockid_t clock_id)

View File

@@ -31,7 +31,7 @@
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
unsigned long tick_usec = USER_TICK_USEC;
/* SHIFTED_HZ period (nsecs): */
unsigned long tick_nsec;

View File

@@ -122,8 +122,7 @@ static ktime_t tick_init_jiffy_update(void)
return period;
}
static void tick_sched_do_timer(ktime_t now)
static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
{
int cpu = smp_processor_id();
@@ -143,6 +142,9 @@ static void tick_sched_do_timer(ktime_t now)
/* Check, if the jiffies need an update */
if (tick_do_timer_cpu == cpu)
tick_do_update_jiffies64(now);
if (ts->inidle)
ts->got_idle_tick = 1;
}
static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
@@ -474,7 +476,9 @@ __setup("nohz=", setup_tick_nohz);
bool tick_nohz_tick_stopped(void)
{
return __this_cpu_read(tick_cpu_sched.tick_stopped);
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
return ts->tick_stopped;
}
bool tick_nohz_tick_stopped_cpu(int cpu)
@@ -537,14 +541,11 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
sched_clock_idle_wakeup_event();
}
static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
static void tick_nohz_start_idle(struct tick_sched *ts)
{
ktime_t now = ktime_get();
ts->idle_entrytime = now;
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
sched_clock_idle_sleep_event();
return now;
}
/**
@@ -653,13 +654,10 @@ static inline bool local_timer_softirq_pending(void)
return local_softirq_pending() & TIMER_SOFTIRQ;
}
static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
ktime_t now, int cpu)
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
unsigned long seq, basejiff;
ktime_t tick;
/* Read jiffies and the time when jiffies were updated last */
do {
@@ -668,6 +666,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
basejiff = jiffies;
} while (read_seqretry(&jiffies_lock, seq));
ts->last_jiffies = basejiff;
ts->timer_expires_base = basemono;
/*
* Keep the periodic tick, when RCU, architecture or irq_work
@@ -712,32 +711,20 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
* next period, so no point in stopping it either, bail.
*/
if (!ts->tick_stopped) {
tick = 0;
ts->timer_expires = 0;
goto out;
}
}
/*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last. If this CPU
* is the one which had the do_timer() duty last, we limit the
* sleep time to the timekeeping max_deferment value.
* If this CPU is the one which had the do_timer() duty last, we limit
* the sleep time to the timekeeping max_deferment value.
* Otherwise we can sleep as long as we want.
*/
delta = timekeeping_max_deferment();
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
if (cpu != tick_do_timer_cpu &&
(tick_do_timer_cpu != TICK_DO_TIMER_NONE || !ts->do_timer_last))
delta = KTIME_MAX;
ts->do_timer_last = 0;
} else if (!ts->do_timer_last) {
delta = KTIME_MAX;
}
/* Calculate the next expiry time */
if (delta < (KTIME_MAX - basemono))
@@ -745,14 +732,42 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
else
expires = KTIME_MAX;
expires = min_t(u64, expires, next_tick);
tick = expires;
ts->timer_expires = min_t(u64, expires, next_tick);
out:
return ts->timer_expires;
}
static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
u64 basemono = ts->timer_expires_base;
u64 expires = ts->timer_expires;
ktime_t tick = expires;
/* Make sure we won't be trying to stop it twice in a row. */
ts->timer_expires_base = 0;
/*
* If this CPU is the one which updates jiffies, then give up
* the assignment and let it be taken by the CPU which runs
* the tick timer next, which might be this CPU as well. If we
* don't drop this here the jiffies might be stale and
* do_timer() never invoked. Keep track of the fact that it
* was the one which had the do_timer() duty last.
*/
if (cpu == tick_do_timer_cpu) {
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
ts->do_timer_last = 1;
} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
ts->do_timer_last = 0;
}
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && (expires == ts->next_tick)) {
/* Sanity check: make sure clockevent is actually programmed */
if (tick == KTIME_MAX || ts->next_tick == hrtimer_get_expires(&ts->sched_timer))
goto out;
return;
WARN_ON_ONCE(1);
printk_once("basemono: %llu ts->next_tick: %llu dev->next_event: %llu timer->active: %d timer->expires: %llu\n",
@@ -786,7 +801,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer);
goto out;
return;
}
hrtimer_set_expires(&ts->sched_timer, tick);
@@ -795,15 +810,23 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
else
tick_program_event(tick, 1);
out:
/*
* Update the estimated sleep length until the next timer
* (not only the tick).
*/
ts->sleep_length = ktime_sub(dev->next_event, now);
return tick;
}
static void tick_nohz_retain_tick(struct tick_sched *ts)
{
ts->timer_expires_base = 0;
}
#ifdef CONFIG_NO_HZ_FULL
static void tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
{
if (tick_nohz_next_event(ts, cpu))
tick_nohz_stop_tick(ts, cpu);
else
tick_nohz_retain_tick(ts);
}
#endif /* CONFIG_NO_HZ_FULL */
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
@@ -839,7 +862,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
return;
if (can_stop_full_tick(cpu, ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
tick_nohz_stop_sched_tick(ts, cpu);
else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
@@ -865,10 +888,8 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
ts->sleep_length = NSEC_PER_SEC / HZ;
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
return false;
}
if (need_resched())
return false;
@@ -903,42 +924,65 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return true;
}
static void __tick_nohz_idle_enter(struct tick_sched *ts)
static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
{
ktime_t now, expires;
ktime_t expires;
int cpu = smp_processor_id();
now = tick_nohz_start_idle(ts);
/*
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
* tick timer expiration time is known already.
*/
if (ts->timer_expires_base)
expires = ts->timer_expires;
else if (can_stop_idle_tick(cpu, ts))
expires = tick_nohz_next_event(ts, cpu);
else
return;
if (can_stop_idle_tick(cpu, ts)) {
ts->idle_calls++;
if (expires > 0LL) {
int was_stopped = ts->tick_stopped;
ts->idle_calls++;
tick_nohz_stop_tick(ts, cpu);
expires = tick_nohz_stop_sched_tick(ts, now, cpu);
if (expires > 0LL) {
ts->idle_sleeps++;
ts->idle_expires = expires;
}
ts->idle_sleeps++;
ts->idle_expires = expires;
if (!was_stopped && ts->tick_stopped) {
ts->idle_jiffies = ts->last_jiffies;
nohz_balance_enter_idle(cpu);
}
} else {
tick_nohz_retain_tick(ts);
}
}
/**
* tick_nohz_idle_enter - stop the idle tick from the idle task
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
*/
void tick_nohz_idle_stop_tick(void)
{
__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
}
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
/*
* Undo the effect of get_next_timer_interrupt() called from
* tick_nohz_next_event().
*/
timer_clear_idle();
}
/**
* tick_nohz_idle_enter - prepare for entering idle on the current CPU
*
* Called when we start the idle loop.
*
* The arch is responsible of calling:
*
* - rcu_idle_enter() after its last use of RCU before the CPU is put
* to sleep.
* - rcu_idle_exit() before the first use of RCU after the CPU is woken up.
*/
void tick_nohz_idle_enter(void)
{
@@ -949,8 +993,11 @@ void tick_nohz_idle_enter(void)
local_irq_disable();
ts = this_cpu_ptr(&tick_cpu_sched);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 1;
__tick_nohz_idle_enter(ts);
tick_nohz_start_idle(ts);
local_irq_enable();
}
@@ -968,21 +1015,62 @@ void tick_nohz_irq_exit(void)
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->inidle)
__tick_nohz_idle_enter(ts);
tick_nohz_start_idle(ts);
else
tick_nohz_full_update_tick(ts);
}
/**
* tick_nohz_get_sleep_length - return the length of the current sleep
*
* Called from power state control code with interrupts disabled
* tick_nohz_idle_got_tick - Check whether or not the tick handler has run
*/
ktime_t tick_nohz_get_sleep_length(void)
bool tick_nohz_idle_got_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
return ts->sleep_length;
if (ts->got_idle_tick) {
ts->got_idle_tick = 0;
return true;
}
return false;
}
/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*
* Called from power state control code with interrupts disabled
*/
ktime_t tick_nohz_get_sleep_length(ktime_t *delta_next)
{
struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
int cpu = smp_processor_id();
/*
* The idle entry time is expected to be a sufficient approximation of
* the current time at this point.
*/
ktime_t now = ts->idle_entrytime;
ktime_t next_event;
WARN_ON_ONCE(!ts->inidle);
*delta_next = ktime_sub(dev->next_event, now);
if (!can_stop_idle_tick(cpu, ts))
return *delta_next;
next_event = tick_nohz_next_event(ts, cpu);
if (!next_event)
return *delta_next;
/*
* If the next highres timer to expire is earlier than next_event, the
* idle governor needs to know that.
*/
next_event = min_t(u64, next_event,
hrtimer_next_event_without(&ts->sched_timer));
return ktime_sub(next_event, now);
}
/**
@@ -1031,6 +1119,20 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#endif
}
static void __tick_nohz_idle_restart_tick(struct tick_sched *ts, ktime_t now)
{
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
void tick_nohz_idle_restart_tick(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
if (ts->tick_stopped)
__tick_nohz_idle_restart_tick(ts, ktime_get());
}
/**
* tick_nohz_idle_exit - restart the idle tick from the idle task
*
@@ -1041,24 +1143,26 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
void tick_nohz_idle_exit(void)
{
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
bool idle_active, tick_stopped;
ktime_t now;
local_irq_disable();
WARN_ON_ONCE(!ts->inidle);
WARN_ON_ONCE(ts->timer_expires_base);
ts->inidle = 0;
idle_active = ts->idle_active;
tick_stopped = ts->tick_stopped;
if (ts->idle_active || ts->tick_stopped)
if (idle_active || tick_stopped)
now = ktime_get();
if (ts->idle_active)
if (idle_active)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
if (tick_stopped)
__tick_nohz_idle_restart_tick(ts, now);
local_irq_enable();
}
@@ -1074,7 +1178,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
dev->next_event = KTIME_MAX;
tick_sched_do_timer(now);
tick_sched_do_timer(ts, now);
tick_sched_handle(ts, regs);
/* No need to reprogram if we are running tickless */
@@ -1169,7 +1273,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
struct pt_regs *regs = get_irq_regs();
ktime_t now = ktime_get();
tick_sched_do_timer(now);
tick_sched_do_timer(ts, now);
/*
* Do not call, when we are not in irq context and have

View File

@@ -38,31 +38,37 @@ enum tick_nohz_mode {
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @sleep_length: Duration of the current idle sleep
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @do_timer_lst: CPU was the last one doing do_timer before going idle
* @got_idle_tick: Tick timer function has run with @inidle set
*/
struct tick_sched {
struct hrtimer sched_timer;
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
unsigned int inidle : 1;
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
unsigned int got_idle_tick : 1;
ktime_t last_tick;
ktime_t next_tick;
int inidle;
int tick_stopped;
unsigned long idle_jiffies;
unsigned long idle_calls;
unsigned long idle_sleeps;
int idle_active;
ktime_t idle_entrytime;
ktime_t idle_waketime;
ktime_t idle_exittime;
ktime_t idle_sleeptime;
ktime_t iowait_sleeptime;
ktime_t sleep_length;
unsigned long last_jiffies;
u64 timer_expires;
u64 timer_expires_base;
u64 next_timer;
ktime_t idle_expires;
int do_timer_last;
atomic_t tick_dep_mask;
};