Revert: Unify CLOCK_MONOTONIC and CLOCK_BOOTTIME

Revert commits

92af4dcb4e ("tracing: Unify the "boot" and "mono" tracing clocks")
127bfa5f43 ("hrtimer: Unify MONOTONIC and BOOTTIME clock behavior")
7250a4047a ("posix-timers: Unify MONOTONIC and BOOTTIME clock behavior")
d6c7270e91 ("timekeeping: Remove boot time specific code")
f2d6fdbfd2 ("Input: Evdev - unify MONOTONIC and BOOTTIME clock behavior")
d6ed449afd ("timekeeping: Make the MONOTONIC clock behave like the BOOTTIME clock")
72199320d4 ("timekeeping: Add the new CLOCK_MONOTONIC_ACTIVE clock")

As stated in the pull request for the unification of CLOCK_MONOTONIC and
CLOCK_BOOTTIME, it was clear that we might have to revert the change.

As reported by several folks systemd and other applications rely on the
documented behaviour of CLOCK_MONOTONIC on Linux and break with the above
changes. After resume daemons time out and other timeout related issues are
observed. Rafael compiled this list:

* systemd kills daemons on resume, after >WatchdogSec seconds
  of suspending (Genki Sky).  [Verified that that's because systemd uses
  CLOCK_MONOTONIC and expects it to not include the suspend time.]

* systemd-journald misbehaves after resume:
  systemd-journald[7266]: File /var/log/journal/016627c3c4784cd4812d4b7e96a34226/system.journal
corrupted or uncleanly shut down, renaming and replacing.
  (Mike Galbraith).

* NetworkManager reports "networking disabled" and networking is broken
  after resume 50% of the time (Pavel).  [May be because of systemd.]

* MATE desktop dims the display and starts the screensaver right after
  system resume (Pavel).

* Full system hang during resume (me).  [May be due to systemd or NM or both.]

That happens on debian and open suse systems.

It's sad, that these problems were neither catched in -next nor by those
folks who expressed interest in this change.

Reported-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Reported-by: Genki Sky <sky@genki.is>,
Reported-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Easton <kevin@guarana.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salyzyn <salyzyn@android.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
Thomas Gleixner
2018-04-25 15:33:38 +02:00
parent 1f71addd34
commit a3ed0e4393
15 changed files with 114 additions and 104 deletions

View File

@@ -90,6 +90,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI,
.clockid = CLOCK_TAI,
@@ -105,6 +110,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME_SOFT,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI_SOFT,
.clockid = CLOCK_TAI,
@@ -119,7 +129,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
[CLOCK_TAI] = HRTIMER_BASE_TAI,
};
@@ -571,12 +581,14 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
offs_real, offs_tai);
offs_real, offs_boot, offs_tai);
base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
return now;

View File

@@ -83,8 +83,6 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
case CLOCK_BOOTTIME:
get_monotonic_boottime64(tp);
break;
case CLOCK_MONOTONIC_ACTIVE:
ktime_get_active_ts64(tp);
default:
return -EINVAL;
}

View File

@@ -252,16 +252,15 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
return 0;
}
static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
{
timekeeping_clocktai64(tp);
get_monotonic_boottime64(tp);
return 0;
}
static int posix_get_monotonic_active(clockid_t which_clock,
struct timespec64 *tp)
static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_active_ts64(tp);
timekeeping_clocktai64(tp);
return 0;
}
@@ -1317,9 +1316,19 @@ static const struct k_clock clock_tai = {
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock clock_monotonic_active = {
static const struct k_clock clock_boottime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get = posix_get_monotonic_active,
.clock_get = posix_get_boottime,
.nsleep = common_nsleep,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock * const posix_clocks[] = {
@@ -1330,11 +1339,10 @@ static const struct k_clock * const posix_clocks[] = {
[CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw,
[CLOCK_REALTIME_COARSE] = &clock_realtime_coarse,
[CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse,
[CLOCK_BOOTTIME] = &clock_monotonic,
[CLOCK_BOOTTIME] = &clock_boottime,
[CLOCK_REALTIME_ALARM] = &alarm_clock,
[CLOCK_BOOTTIME_ALARM] = &alarm_clock,
[CLOCK_TAI] = &clock_tai,
[CLOCK_MONOTONIC_ACTIVE] = &clock_monotonic_active,
};
static const struct k_clock *clockid_to_kclock(const clockid_t id)

View File

@@ -419,19 +419,6 @@ void tick_suspend_local(void)
clockevents_shutdown(td->evtdev);
}
static void tick_forward_next_period(void)
{
ktime_t delta, now = ktime_get();
u64 n;
delta = ktime_sub(now, tick_next_period);
n = ktime_divns(delta, tick_period);
tick_next_period += n * tick_period;
if (tick_next_period < now)
tick_next_period += tick_period;
tick_sched_forward_next_period();
}
/**
* tick_resume_local - Resume the local tick device
*
@@ -444,8 +431,6 @@ void tick_resume_local(void)
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
bool broadcast = tick_resume_check_broadcast();
tick_forward_next_period();
clockevents_tick_resume(td->evtdev);
if (!broadcast) {
if (td->mode == TICKDEV_MODE_PERIODIC)

View File

@@ -141,12 +141,6 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
#endif /* !(BROADCAST && ONESHOT) */
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
extern void tick_sched_forward_next_period(void);
#else
static inline void tick_sched_forward_next_period(void) { }
#endif
/* NO_HZ_FULL internal */
#ifdef CONFIG_NO_HZ_FULL
extern void tick_nohz_init(void);

View File

@@ -51,15 +51,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
*/
static ktime_t last_jiffies_update;
/*
* Called after resume. Make sure that jiffies are not fast forwarded due to
* clock monotonic being forwarded by the suspended time.
*/
void tick_sched_forward_next_period(void)
{
last_jiffies_update = tick_next_period;
}
/*
* Must be called with interrupts disabled !
*/

View File

@@ -138,12 +138,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
/* Update both bases so mono and raw stay coupled. */
tk->tkr_mono.base += delta;
tk->tkr_raw.base += delta;
/* Accumulate time spent in suspend */
tk->time_suspended += delta;
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
/*
@@ -473,6 +468,36 @@ u64 ktime_get_raw_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/**
* ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
*
* To keep it NMI safe since we're accessing from tracing, we're not using a
* separate timekeeper with updates to monotonic clock and boot offset
* protected with seqlocks. This has the following minor side effects:
*
* (1) Its possible that a timestamp be taken after the boot offset is updated
* but before the timekeeper is updated. If this happens, the new boot offset
* is added to the old timekeeping making the clock appear to update slightly
* earlier:
* CPU 0 CPU 1
* timekeeping_inject_sleeptime64()
* __timekeeping_inject_sleeptime(tk, delta);
* timestamp();
* timekeeping_update(tk, TK_CLEAR_NTP...);
*
* (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
* partially updated. Since the tk->offs_boot update is a rare event, this
* should be a rare occurrence which postprocessing should be able to handle.
*/
u64 notrace ktime_get_boot_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
/*
* See comment for __ktime_get_fast_ns() vs. timestamp ordering
*/
@@ -764,6 +789,7 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
static ktime_t *offsets[TK_OFFS_MAX] = {
[TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
[TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
[TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
};
@@ -860,39 +886,6 @@ void ktime_get_ts64(struct timespec64 *ts)
}
EXPORT_SYMBOL_GPL(ktime_get_ts64);
/**
* ktime_get_active_ts64 - Get the active non-suspended monotonic clock
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime clock and
* the wall_to_monotonic offset, subtracts the accumulated suspend time and
* stores the result in normalized timespec64 format in the variable
* pointed to by @ts.
*/
void ktime_get_active_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 tomono, tsusp;
u64 nsec, nssusp;
unsigned int seq;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
nssusp = tk->time_suspended;
} while (read_seqcount_retry(&tk_core.seq, seq));
ts->tv_sec += tomono.tv_sec;
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsec + tomono.tv_nsec);
tsusp = ns_to_timespec64(nssusp);
*ts = timespec64_sub(*ts, tsusp);
}
/**
* ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
*
@@ -1593,6 +1586,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
return;
}
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
tk_debug_account_sleep_time(delta);
}
@@ -2125,7 +2119,7 @@ out:
void getboottime64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended);
ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
*ts = ktime_to_timespec64(t);
}
@@ -2188,6 +2182,7 @@ void do_timer(unsigned long ticks)
* ktime_get_update_offsets_now - hrtimer helper
* @cwsseq: pointer to check and store the clock was set sequence number
* @offs_real: pointer to storage for monotonic -> realtime offset
* @offs_boot: pointer to storage for monotonic -> boottime offset
* @offs_tai: pointer to storage for monotonic -> clock tai offset
*
* Returns current monotonic time and updates the offsets if the
@@ -2197,7 +2192,7 @@ void do_timer(unsigned long ticks)
* Called from hrtimer_interrupt() or retrigger_next_event()
*/
ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
ktime_t *offs_tai)
ktime_t *offs_boot, ktime_t *offs_tai)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
@@ -2214,6 +2209,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
if (*cwsseq != tk->clock_was_set_seq) {
*cwsseq = tk->clock_was_set_seq;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
*offs_tai = tk->offs_tai;
}

View File

@@ -6,6 +6,7 @@
*/
extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
ktime_t *offs_real,
ktime_t *offs_boot,
ktime_t *offs_tai);
extern int timekeeping_valid_for_hres(void);

View File

@@ -1165,7 +1165,7 @@ static struct {
{ trace_clock, "perf", 1 },
{ ktime_get_mono_fast_ns, "mono", 1 },
{ ktime_get_raw_fast_ns, "mono_raw", 1 },
{ ktime_get_mono_fast_ns, "boot", 1 },
{ ktime_get_boot_fast_ns, "boot", 1 },
ARCH_TRACE_CLOCKS
};