time, signal: Protect resource use statistics with seqlock
Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability issues on large systems, due to both functions being serialized with a lock. The lock protects against reporting a wrong value, due to a thread in the task group exiting, its statistics reporting up to the signal struct, and that exited task's statistics being counted twice (or not at all). Protecting that with a lock results in times() and clock_gettime() being completely serialized on large systems. This can be fixed by using a seqlock around the events that gather and propagate statistics. As an additional benefit, the protection code can be moved into thread_group_cputime(), slightly simplifying the calling functions. In the case of posix_cpu_clock_get_task() things can be simplified a lot, because the calling function already ensures that the task sticks around, and the rest is now taken care of in thread_group_cputime(). This way the statistics reporting code can run lockless. Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alex Thorlton <athorlton@sgi.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Daeseok Youn <daeseok.youn@gmail.com> Cc: David Rientjes <rientjes@google.com> Cc: Dongsheng Yang <yangds.fnst@cn.fujitsu.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guillaume Morin <guillaume@morinfr.org> Cc: Ionut Alexa <ionut.m.alexa@gmail.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Michal Schmidt <mschmidt@redhat.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Vladimir Davydov <vdavydov@parallels.com> Cc: umgwanakikbuti@gmail.com Cc: fweisbec@gmail.com Cc: srao@redhat.com Cc: lwoodman@redhat.com Cc: atheurer@redhat.com Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:

committed by
Ingo Molnar

parent
90ed9cbe76
commit
e78c349679
@@ -288,18 +288,28 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
|
||||
struct signal_struct *sig = tsk->signal;
|
||||
cputime_t utime, stime;
|
||||
struct task_struct *t;
|
||||
|
||||
times->utime = sig->utime;
|
||||
times->stime = sig->stime;
|
||||
times->sum_exec_runtime = sig->sum_sched_runtime;
|
||||
unsigned int seq, nextseq;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_thread(tsk, t) {
|
||||
task_cputime(t, &utime, &stime);
|
||||
times->utime += utime;
|
||||
times->stime += stime;
|
||||
times->sum_exec_runtime += task_sched_runtime(t);
|
||||
}
|
||||
/* Attempt a lockless read on the first round. */
|
||||
nextseq = 0;
|
||||
do {
|
||||
seq = nextseq;
|
||||
read_seqbegin_or_lock(&sig->stats_lock, &seq);
|
||||
times->utime = sig->utime;
|
||||
times->stime = sig->stime;
|
||||
times->sum_exec_runtime = sig->sum_sched_runtime;
|
||||
|
||||
for_each_thread(tsk, t) {
|
||||
task_cputime(t, &utime, &stime);
|
||||
times->utime += utime;
|
||||
times->stime += stime;
|
||||
times->sum_exec_runtime += task_sched_runtime(t);
|
||||
}
|
||||
/* If lockless access failed, take the lock. */
|
||||
nextseq = 1;
|
||||
} while (need_seqretry(&sig->stats_lock, seq));
|
||||
done_seqretry(&sig->stats_lock, seq);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -611,9 +621,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
||||
cputime_adjust(&cputime, &p->prev_cputime, ut, st);
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with siglock held.
|
||||
*/
|
||||
void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
Reference in New Issue
Block a user