Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into cputime-tip
Conflicts: drivers/cpufreq/cpufreq_conservative.c drivers/cpufreq/cpufreq_ondemand.c drivers/macintosh/rack-meter.c fs/proc/stat.c fs/proc/uptime.c kernel/sched/core.c
This commit is contained in:
20
kernel/sched/Makefile
Normal file
20
kernel/sched/Makefile
Normal file
@@ -0,0 +1,20 @@
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_clock.o = -pg
|
||||
endif
|
||||
|
||||
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
|
||||
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
|
||||
# needed for x86 only. Why this used to be enabled for all architectures is beyond
|
||||
# me. I suspect most platforms don't need this, but until we know that for sure
|
||||
# I turn this off for IA-64 only. Andreas Schwab says it's also needed on m68k
|
||||
# to get a correct value for the wait-channel (WCHAN in ps). --davidm
|
||||
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
|
||||
endif
|
||||
|
||||
obj-y += core.o clock.o idle_task.o fair.o rt.o stop_task.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
|
||||
|
258
kernel/sched/auto_group.c
Normal file
258
kernel/sched/auto_group.c
Normal file
@@ -0,0 +1,258 @@
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/export.h>
|
||||
|
||||
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
|
||||
static struct autogroup autogroup_default;
|
||||
static atomic_t autogroup_seq_nr;
|
||||
|
||||
void __init autogroup_init(struct task_struct *init_task)
|
||||
{
|
||||
autogroup_default.tg = &root_task_group;
|
||||
kref_init(&autogroup_default.kref);
|
||||
init_rwsem(&autogroup_default.lock);
|
||||
init_task->signal->autogroup = &autogroup_default;
|
||||
}
|
||||
|
||||
void autogroup_free(struct task_group *tg)
|
||||
{
|
||||
kfree(tg->autogroup);
|
||||
}
|
||||
|
||||
static inline void autogroup_destroy(struct kref *kref)
|
||||
{
|
||||
struct autogroup *ag = container_of(kref, struct autogroup, kref);
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/* We've redirected RT tasks to the root task group... */
|
||||
ag->tg->rt_se = NULL;
|
||||
ag->tg->rt_rq = NULL;
|
||||
#endif
|
||||
sched_destroy_group(ag->tg);
|
||||
}
|
||||
|
||||
static inline void autogroup_kref_put(struct autogroup *ag)
|
||||
{
|
||||
kref_put(&ag->kref, autogroup_destroy);
|
||||
}
|
||||
|
||||
static inline struct autogroup *autogroup_kref_get(struct autogroup *ag)
|
||||
{
|
||||
kref_get(&ag->kref);
|
||||
return ag;
|
||||
}
|
||||
|
||||
static inline struct autogroup *autogroup_task_get(struct task_struct *p)
|
||||
{
|
||||
struct autogroup *ag;
|
||||
unsigned long flags;
|
||||
|
||||
if (!lock_task_sighand(p, &flags))
|
||||
return autogroup_kref_get(&autogroup_default);
|
||||
|
||||
ag = autogroup_kref_get(p->signal->autogroup);
|
||||
unlock_task_sighand(p, &flags);
|
||||
|
||||
return ag;
|
||||
}
|
||||
|
||||
static inline struct autogroup *autogroup_create(void)
|
||||
{
|
||||
struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
|
||||
struct task_group *tg;
|
||||
|
||||
if (!ag)
|
||||
goto out_fail;
|
||||
|
||||
tg = sched_create_group(&root_task_group);
|
||||
|
||||
if (IS_ERR(tg))
|
||||
goto out_free;
|
||||
|
||||
kref_init(&ag->kref);
|
||||
init_rwsem(&ag->lock);
|
||||
ag->id = atomic_inc_return(&autogroup_seq_nr);
|
||||
ag->tg = tg;
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
/*
|
||||
* Autogroup RT tasks are redirected to the root task group
|
||||
* so we don't have to move tasks around upon policy change,
|
||||
* or flail around trying to allocate bandwidth on the fly.
|
||||
* A bandwidth exception in __sched_setscheduler() allows
|
||||
* the policy change to proceed. Thereafter, task_group()
|
||||
* returns &root_task_group, so zero bandwidth is required.
|
||||
*/
|
||||
free_rt_sched_group(tg);
|
||||
tg->rt_se = root_task_group.rt_se;
|
||||
tg->rt_rq = root_task_group.rt_rq;
|
||||
#endif
|
||||
tg->autogroup = ag;
|
||||
|
||||
return ag;
|
||||
|
||||
out_free:
|
||||
kfree(ag);
|
||||
out_fail:
|
||||
if (printk_ratelimit()) {
|
||||
printk(KERN_WARNING "autogroup_create: %s failure.\n",
|
||||
ag ? "sched_create_group()" : "kmalloc()");
|
||||
}
|
||||
|
||||
return autogroup_kref_get(&autogroup_default);
|
||||
}
|
||||
|
||||
bool task_wants_autogroup(struct task_struct *p, struct task_group *tg)
|
||||
{
|
||||
if (tg != &root_task_group)
|
||||
return false;
|
||||
|
||||
if (p->sched_class != &fair_sched_class)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We can only assume the task group can't go away on us if
|
||||
* autogroup_move_group() can see us on ->thread_group list.
|
||||
*/
|
||||
if (p->flags & PF_EXITING)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
autogroup_move_group(struct task_struct *p, struct autogroup *ag)
|
||||
{
|
||||
struct autogroup *prev;
|
||||
struct task_struct *t;
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(!lock_task_sighand(p, &flags));
|
||||
|
||||
prev = p->signal->autogroup;
|
||||
if (prev == ag) {
|
||||
unlock_task_sighand(p, &flags);
|
||||
return;
|
||||
}
|
||||
|
||||
p->signal->autogroup = autogroup_kref_get(ag);
|
||||
|
||||
if (!ACCESS_ONCE(sysctl_sched_autogroup_enabled))
|
||||
goto out;
|
||||
|
||||
t = p;
|
||||
do {
|
||||
sched_move_task(t);
|
||||
} while_each_thread(p, t);
|
||||
|
||||
out:
|
||||
unlock_task_sighand(p, &flags);
|
||||
autogroup_kref_put(prev);
|
||||
}
|
||||
|
||||
/* Allocates GFP_KERNEL, cannot be called under any spinlock */
|
||||
void sched_autogroup_create_attach(struct task_struct *p)
|
||||
{
|
||||
struct autogroup *ag = autogroup_create();
|
||||
|
||||
autogroup_move_group(p, ag);
|
||||
/* drop extra reference added by autogroup_create() */
|
||||
autogroup_kref_put(ag);
|
||||
}
|
||||
EXPORT_SYMBOL(sched_autogroup_create_attach);
|
||||
|
||||
/* Cannot be called under siglock. Currently has no users */
|
||||
void sched_autogroup_detach(struct task_struct *p)
|
||||
{
|
||||
autogroup_move_group(p, &autogroup_default);
|
||||
}
|
||||
EXPORT_SYMBOL(sched_autogroup_detach);
|
||||
|
||||
void sched_autogroup_fork(struct signal_struct *sig)
|
||||
{
|
||||
sig->autogroup = autogroup_task_get(current);
|
||||
}
|
||||
|
||||
void sched_autogroup_exit(struct signal_struct *sig)
|
||||
{
|
||||
autogroup_kref_put(sig->autogroup);
|
||||
}
|
||||
|
||||
static int __init setup_autogroup(char *str)
|
||||
{
|
||||
sysctl_sched_autogroup_enabled = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noautogroup", setup_autogroup);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
int proc_sched_autogroup_set_nice(struct task_struct *p, int *nice)
|
||||
{
|
||||
static unsigned long next = INITIAL_JIFFIES;
|
||||
struct autogroup *ag;
|
||||
int err;
|
||||
|
||||
if (*nice < -20 || *nice > 19)
|
||||
return -EINVAL;
|
||||
|
||||
err = security_task_setnice(current, *nice);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (*nice < 0 && !can_nice(current, *nice))
|
||||
return -EPERM;
|
||||
|
||||
/* this is a heavy operation taking global locks.. */
|
||||
if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
|
||||
return -EAGAIN;
|
||||
|
||||
next = HZ / 10 + jiffies;
|
||||
ag = autogroup_task_get(p);
|
||||
|
||||
down_write(&ag->lock);
|
||||
err = sched_group_set_shares(ag->tg, prio_to_weight[*nice + 20]);
|
||||
if (!err)
|
||||
ag->nice = *nice;
|
||||
up_write(&ag->lock);
|
||||
|
||||
autogroup_kref_put(ag);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
struct autogroup *ag = autogroup_task_get(p);
|
||||
|
||||
if (!task_group_is_autogroup(ag->tg))
|
||||
goto out;
|
||||
|
||||
down_read(&ag->lock);
|
||||
seq_printf(m, "/autogroup-%ld nice %d\n", ag->id, ag->nice);
|
||||
up_read(&ag->lock);
|
||||
|
||||
out:
|
||||
autogroup_kref_put(ag);
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
{
|
||||
if (!task_group_is_autogroup(tg))
|
||||
return 0;
|
||||
|
||||
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
|
||||
}
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
|
||||
#endif /* CONFIG_SCHED_AUTOGROUP */
|
64
kernel/sched/auto_group.h
Normal file
64
kernel/sched/auto_group.h
Normal file
@@ -0,0 +1,64 @@
|
||||
#ifdef CONFIG_SCHED_AUTOGROUP
|
||||
|
||||
#include <linux/kref.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
struct autogroup {
|
||||
/*
|
||||
* reference doesn't mean how many thread attach to this
|
||||
* autogroup now. It just stands for the number of task
|
||||
* could use this autogroup.
|
||||
*/
|
||||
struct kref kref;
|
||||
struct task_group *tg;
|
||||
struct rw_semaphore lock;
|
||||
unsigned long id;
|
||||
int nice;
|
||||
};
|
||||
|
||||
extern void autogroup_init(struct task_struct *init_task);
|
||||
extern void autogroup_free(struct task_group *tg);
|
||||
|
||||
static inline bool task_group_is_autogroup(struct task_group *tg)
|
||||
{
|
||||
return !!tg->autogroup;
|
||||
}
|
||||
|
||||
extern bool task_wants_autogroup(struct task_struct *p, struct task_group *tg);
|
||||
|
||||
static inline struct task_group *
|
||||
autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
||||
{
|
||||
int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
|
||||
|
||||
if (enabled && task_wants_autogroup(p, tg))
|
||||
return p->signal->autogroup->tg;
|
||||
|
||||
return tg;
|
||||
}
|
||||
|
||||
extern int autogroup_path(struct task_group *tg, char *buf, int buflen);
|
||||
|
||||
#else /* !CONFIG_SCHED_AUTOGROUP */
|
||||
|
||||
static inline void autogroup_init(struct task_struct *init_task) { }
|
||||
static inline void autogroup_free(struct task_group *tg) { }
|
||||
static inline bool task_group_is_autogroup(struct task_group *tg)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline struct task_group *
|
||||
autogroup_task_group(struct task_struct *p, struct task_group *tg)
|
||||
{
|
||||
return tg;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_SCHED_AUTOGROUP */
|
350
kernel/sched/clock.c
Normal file
350
kernel/sched/clock.c
Normal file
@@ -0,0 +1,350 @@
|
||||
/*
|
||||
* sched_clock for unstable cpu clocks
|
||||
*
|
||||
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
|
||||
*
|
||||
* Updates and enhancements:
|
||||
* Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
|
||||
*
|
||||
* Based on code by:
|
||||
* Ingo Molnar <mingo@redhat.com>
|
||||
* Guillaume Chazarain <guichaz@gmail.com>
|
||||
*
|
||||
*
|
||||
* What:
|
||||
*
|
||||
* cpu_clock(i) provides a fast (execution time) high resolution
|
||||
* clock with bounded drift between CPUs. The value of cpu_clock(i)
|
||||
* is monotonic for constant i. The timestamp returned is in nanoseconds.
|
||||
*
|
||||
* ######################### BIG FAT WARNING ##########################
|
||||
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
|
||||
* # go backwards !! #
|
||||
* ####################################################################
|
||||
*
|
||||
* There is no strict promise about the base, although it tends to start
|
||||
* at 0 on boot (but people really shouldn't rely on that).
|
||||
*
|
||||
* cpu_clock(i) -- can be used from any context, including NMI.
|
||||
* sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
|
||||
* local_clock() -- is cpu_clock() on the current cpu.
|
||||
*
|
||||
* How:
|
||||
*
|
||||
* The implementation either uses sched_clock() when
|
||||
* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
|
||||
* sched_clock() is assumed to provide these properties (mostly it means
|
||||
* the architecture provides a globally synchronized highres time source).
|
||||
*
|
||||
* Otherwise it tries to create a semi stable clock from a mixture of other
|
||||
* clocks, including:
|
||||
*
|
||||
* - GTOD (clock monotomic)
|
||||
* - sched_clock()
|
||||
* - explicit idle events
|
||||
*
|
||||
* We use GTOD as base and use sched_clock() deltas to improve resolution. The
|
||||
* deltas are filtered to provide monotonicity and keeping it within an
|
||||
* expected window.
|
||||
*
|
||||
* Furthermore, explicit sleep and wakeup hooks allow us to account for time
|
||||
* that is otherwise invisible (TSC gets stopped).
|
||||
*
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
* The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
|
||||
* like cpufreq interrupts that can change the base clock (TSC) multiplier
|
||||
* and cause funny jumps in time -- although the filtering provided by
|
||||
* sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
|
||||
* in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
|
||||
* sched_clock().
|
||||
*/
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
* This is default implementation.
|
||||
* Architectures and sub-architectures can override this.
|
||||
*/
|
||||
unsigned long long __attribute__((weak)) sched_clock(void)
|
||||
{
|
||||
return (unsigned long long)(jiffies - INITIAL_JIFFIES)
|
||||
* (NSEC_PER_SEC / HZ);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_clock);
|
||||
|
||||
__read_mostly int sched_clock_running;
|
||||
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
__read_mostly int sched_clock_stable;
|
||||
|
||||
struct sched_clock_data {
|
||||
u64 tick_raw;
|
||||
u64 tick_gtod;
|
||||
u64 clock;
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
|
||||
|
||||
static inline struct sched_clock_data *this_scd(void)
|
||||
{
|
||||
return &__get_cpu_var(sched_clock_data);
|
||||
}
|
||||
|
||||
static inline struct sched_clock_data *cpu_sdc(int cpu)
|
||||
{
|
||||
return &per_cpu(sched_clock_data, cpu);
|
||||
}
|
||||
|
||||
void sched_clock_init(void)
|
||||
{
|
||||
u64 ktime_now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||
|
||||
scd->tick_raw = 0;
|
||||
scd->tick_gtod = ktime_now;
|
||||
scd->clock = ktime_now;
|
||||
}
|
||||
|
||||
sched_clock_running = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* min, max except they take wrapping into account
|
||||
*/
|
||||
|
||||
static inline u64 wrap_min(u64 x, u64 y)
|
||||
{
|
||||
return (s64)(x - y) < 0 ? x : y;
|
||||
}
|
||||
|
||||
static inline u64 wrap_max(u64 x, u64 y)
|
||||
{
|
||||
return (s64)(x - y) > 0 ? x : y;
|
||||
}
|
||||
|
||||
/*
|
||||
* update the percpu scd from the raw @now value
|
||||
*
|
||||
* - filter out backward motion
|
||||
* - use the GTOD tick value to create a window to filter crazy TSC values
|
||||
*/
|
||||
static u64 sched_clock_local(struct sched_clock_data *scd)
|
||||
{
|
||||
u64 now, clock, old_clock, min_clock, max_clock;
|
||||
s64 delta;
|
||||
|
||||
again:
|
||||
now = sched_clock();
|
||||
delta = now - scd->tick_raw;
|
||||
if (unlikely(delta < 0))
|
||||
delta = 0;
|
||||
|
||||
old_clock = scd->clock;
|
||||
|
||||
/*
|
||||
* scd->clock = clamp(scd->tick_gtod + delta,
|
||||
* max(scd->tick_gtod, scd->clock),
|
||||
* scd->tick_gtod + TICK_NSEC);
|
||||
*/
|
||||
|
||||
clock = scd->tick_gtod + delta;
|
||||
min_clock = wrap_max(scd->tick_gtod, old_clock);
|
||||
max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC);
|
||||
|
||||
clock = wrap_max(clock, min_clock);
|
||||
clock = wrap_min(clock, max_clock);
|
||||
|
||||
if (cmpxchg64(&scd->clock, old_clock, clock) != old_clock)
|
||||
goto again;
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||
{
|
||||
struct sched_clock_data *my_scd = this_scd();
|
||||
u64 this_clock, remote_clock;
|
||||
u64 *ptr, old_val, val;
|
||||
|
||||
sched_clock_local(my_scd);
|
||||
again:
|
||||
this_clock = my_scd->clock;
|
||||
remote_clock = scd->clock;
|
||||
|
||||
/*
|
||||
* Use the opportunity that we have both locks
|
||||
* taken to couple the two clocks: we take the
|
||||
* larger time as the latest time for both
|
||||
* runqueues. (this creates monotonic movement)
|
||||
*/
|
||||
if (likely((s64)(remote_clock - this_clock) < 0)) {
|
||||
ptr = &scd->clock;
|
||||
old_val = remote_clock;
|
||||
val = this_clock;
|
||||
} else {
|
||||
/*
|
||||
* Should be rare, but possible:
|
||||
*/
|
||||
ptr = &my_scd->clock;
|
||||
old_val = this_clock;
|
||||
val = remote_clock;
|
||||
}
|
||||
|
||||
if (cmpxchg64(ptr, old_val, val) != old_val)
|
||||
goto again;
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to cpu_clock(), but requires local IRQs to be disabled.
|
||||
*
|
||||
* See cpu_clock().
|
||||
*/
|
||||
u64 sched_clock_cpu(int cpu)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
u64 clock;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (sched_clock_stable)
|
||||
return sched_clock();
|
||||
|
||||
if (unlikely(!sched_clock_running))
|
||||
return 0ull;
|
||||
|
||||
scd = cpu_sdc(cpu);
|
||||
|
||||
if (cpu != smp_processor_id())
|
||||
clock = sched_clock_remote(scd);
|
||||
else
|
||||
clock = sched_clock_local(scd);
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
void sched_clock_tick(void)
|
||||
{
|
||||
struct sched_clock_data *scd;
|
||||
u64 now, now_gtod;
|
||||
|
||||
if (sched_clock_stable)
|
||||
return;
|
||||
|
||||
if (unlikely(!sched_clock_running))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
scd = this_scd();
|
||||
now_gtod = ktime_to_ns(ktime_get());
|
||||
now = sched_clock();
|
||||
|
||||
scd->tick_raw = now;
|
||||
scd->tick_gtod = now_gtod;
|
||||
sched_clock_local(scd);
|
||||
}
|
||||
|
||||
/*
|
||||
* We are going deep-idle (irqs are disabled):
|
||||
*/
|
||||
void sched_clock_idle_sleep_event(void)
|
||||
{
|
||||
sched_clock_cpu(smp_processor_id());
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
|
||||
|
||||
/*
|
||||
* We just idled delta nanoseconds (called with irqs disabled):
|
||||
*/
|
||||
void sched_clock_idle_wakeup_event(u64 delta_ns)
|
||||
{
|
||||
if (timekeeping_suspended)
|
||||
return;
|
||||
|
||||
sched_clock_tick();
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
|
||||
|
||||
/*
|
||||
* As outlined at the top, provides a fast, high resolution, nanosecond
|
||||
* time source that is monotonic per cpu argument and has bounded drift
|
||||
* between cpus.
|
||||
*
|
||||
* ######################### BIG FAT WARNING ##########################
|
||||
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
|
||||
* # go backwards !! #
|
||||
* ####################################################################
|
||||
*/
|
||||
u64 cpu_clock(int cpu)
|
||||
{
|
||||
u64 clock;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
clock = sched_clock_cpu(cpu);
|
||||
local_irq_restore(flags);
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to cpu_clock() for the current cpu. Time will only be observed
|
||||
* to be monotonic if care is taken to only compare timestampt taken on the
|
||||
* same CPU.
|
||||
*
|
||||
* See cpu_clock().
|
||||
*/
|
||||
u64 local_clock(void)
|
||||
{
|
||||
u64 clock;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
clock = sched_clock_cpu(smp_processor_id());
|
||||
local_irq_restore(flags);
|
||||
|
||||
return clock;
|
||||
}
|
||||
|
||||
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||
|
||||
void sched_clock_init(void)
|
||||
{
|
||||
sched_clock_running = 1;
|
||||
}
|
||||
|
||||
u64 sched_clock_cpu(int cpu)
|
||||
{
|
||||
if (unlikely(!sched_clock_running))
|
||||
return 0;
|
||||
|
||||
return sched_clock();
|
||||
}
|
||||
|
||||
u64 cpu_clock(int cpu)
|
||||
{
|
||||
return sched_clock_cpu(cpu);
|
||||
}
|
||||
|
||||
u64 local_clock(void)
|
||||
{
|
||||
return sched_clock_cpu(0);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||
|
||||
EXPORT_SYMBOL_GPL(cpu_clock);
|
||||
EXPORT_SYMBOL_GPL(local_clock);
|
8119
kernel/sched/core.c
Normal file
8119
kernel/sched/core.c
Normal file
文件差異過大導致無法顯示
Load Diff
241
kernel/sched/cpupri.c
Normal file
241
kernel/sched/cpupri.c
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* kernel/sched/cpupri.c
|
||||
*
|
||||
* CPU priority management
|
||||
*
|
||||
* Copyright (C) 2007-2008 Novell
|
||||
*
|
||||
* Author: Gregory Haskins <ghaskins@novell.com>
|
||||
*
|
||||
* This code tracks the priority of each CPU so that global migration
|
||||
* decisions are easy to calculate. Each CPU can be in a state as follows:
|
||||
*
|
||||
* (INVALID), IDLE, NORMAL, RT1, ... RT99
|
||||
*
|
||||
* going from the lowest priority to the highest. CPUs in the INVALID state
|
||||
* are not eligible for routing. The system maintains this state with
|
||||
* a 2 dimensional bitmap (the first for priority class, the second for cpus
|
||||
* in that class). Therefore a typical application without affinity
|
||||
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
|
||||
* searches). For tasks with affinity restrictions, the algorithm has a
|
||||
* worst case complexity of O(min(102, nr_domcpus)), though the scenario that
|
||||
* yields the worst case search is fairly contrived.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
#include <linux/gfp.h>
|
||||
#include "cpupri.h"
|
||||
|
||||
/* Convert between a 140 based task->prio, and our 102 based cpupri */
|
||||
static int convert_prio(int prio)
|
||||
{
|
||||
int cpupri;
|
||||
|
||||
if (prio == CPUPRI_INVALID)
|
||||
cpupri = CPUPRI_INVALID;
|
||||
else if (prio == MAX_PRIO)
|
||||
cpupri = CPUPRI_IDLE;
|
||||
else if (prio >= MAX_RT_PRIO)
|
||||
cpupri = CPUPRI_NORMAL;
|
||||
else
|
||||
cpupri = MAX_RT_PRIO - prio + 1;
|
||||
|
||||
return cpupri;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_find - find the best (lowest-pri) CPU in the system
|
||||
* @cp: The cpupri context
|
||||
* @p: The task
|
||||
* @lowest_mask: A mask to fill in with selected CPUs (or NULL)
|
||||
*
|
||||
* Note: This function returns the recommended CPUs as calculated during the
|
||||
* current invocation. By the time the call returns, the CPUs may have in
|
||||
* fact changed priorities any number of times. While not ideal, it is not
|
||||
* an issue of correctness since the normal rebalancer logic will correct
|
||||
* any discrepancies created by racing against the uncertainty of the current
|
||||
* priority configuration.
|
||||
*
|
||||
* Returns: (int)bool - CPUs were found
|
||||
*/
|
||||
int cpupri_find(struct cpupri *cp, struct task_struct *p,
|
||||
struct cpumask *lowest_mask)
|
||||
{
|
||||
int idx = 0;
|
||||
int task_pri = convert_prio(p->prio);
|
||||
|
||||
if (task_pri >= MAX_RT_PRIO)
|
||||
return 0;
|
||||
|
||||
for (idx = 0; idx < task_pri; idx++) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
|
||||
int skip = 0;
|
||||
|
||||
if (!atomic_read(&(vec)->count))
|
||||
skip = 1;
|
||||
/*
|
||||
* When looking at the vector, we need to read the counter,
|
||||
* do a memory barrier, then read the mask.
|
||||
*
|
||||
* Note: This is still all racey, but we can deal with it.
|
||||
* Ideally, we only want to look at masks that are set.
|
||||
*
|
||||
* If a mask is not set, then the only thing wrong is that we
|
||||
* did a little more work than necessary.
|
||||
*
|
||||
* If we read a zero count but the mask is set, because of the
|
||||
* memory barriers, that can only happen when the highest prio
|
||||
* task for a run queue has left the run queue, in which case,
|
||||
* it will be followed by a pull. If the task we are processing
|
||||
* fails to find a proper place to go, that pull request will
|
||||
* pull this task if the run queue is running at a lower
|
||||
* priority.
|
||||
*/
|
||||
smp_rmb();
|
||||
|
||||
/* Need to do the rmb for every iteration */
|
||||
if (skip)
|
||||
continue;
|
||||
|
||||
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
|
||||
if (lowest_mask) {
|
||||
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
|
||||
|
||||
/*
|
||||
* We have to ensure that we have at least one bit
|
||||
* still set in the array, since the map could have
|
||||
* been concurrently emptied between the first and
|
||||
* second reads of vec->mask. If we hit this
|
||||
* condition, simply act as though we never hit this
|
||||
* priority level and continue on.
|
||||
*/
|
||||
if (cpumask_any(lowest_mask) >= nr_cpu_ids)
|
||||
continue;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_set - update the cpu priority setting
|
||||
* @cp: The cpupri context
|
||||
* @cpu: The target cpu
|
||||
* @pri: The priority (INVALID-RT99) to assign to this CPU
|
||||
*
|
||||
* Note: Assumes cpu_rq(cpu)->lock is locked
|
||||
*
|
||||
* Returns: (void)
|
||||
*/
|
||||
void cpupri_set(struct cpupri *cp, int cpu, int newpri)
|
||||
{
|
||||
int *currpri = &cp->cpu_to_pri[cpu];
|
||||
int oldpri = *currpri;
|
||||
int do_mb = 0;
|
||||
|
||||
newpri = convert_prio(newpri);
|
||||
|
||||
BUG_ON(newpri >= CPUPRI_NR_PRIORITIES);
|
||||
|
||||
if (newpri == oldpri)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the cpu was currently mapped to a different value, we
|
||||
* need to map it to the new value then remove the old value.
|
||||
* Note, we must add the new value first, otherwise we risk the
|
||||
* cpu being missed by the priority loop in cpupri_find.
|
||||
*/
|
||||
if (likely(newpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
|
||||
|
||||
cpumask_set_cpu(cpu, vec->mask);
|
||||
/*
|
||||
* When adding a new vector, we update the mask first,
|
||||
* do a write memory barrier, and then update the count, to
|
||||
* make sure the vector is visible when count is set.
|
||||
*/
|
||||
smp_mb__before_atomic_inc();
|
||||
atomic_inc(&(vec)->count);
|
||||
do_mb = 1;
|
||||
}
|
||||
if (likely(oldpri != CPUPRI_INVALID)) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
|
||||
|
||||
/*
|
||||
* Because the order of modification of the vec->count
|
||||
* is important, we must make sure that the update
|
||||
* of the new prio is seen before we decrement the
|
||||
* old prio. This makes sure that the loop sees
|
||||
* one or the other when we raise the priority of
|
||||
* the run queue. We don't care about when we lower the
|
||||
* priority, as that will trigger an rt pull anyway.
|
||||
*
|
||||
* We only need to do a memory barrier if we updated
|
||||
* the new priority vec.
|
||||
*/
|
||||
if (do_mb)
|
||||
smp_mb__after_atomic_inc();
|
||||
|
||||
/*
|
||||
* When removing from the vector, we decrement the counter first
|
||||
* do a memory barrier and then clear the mask.
|
||||
*/
|
||||
atomic_dec(&(vec)->count);
|
||||
smp_mb__after_atomic_inc();
|
||||
cpumask_clear_cpu(cpu, vec->mask);
|
||||
}
|
||||
|
||||
*currpri = newpri;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_init - initialize the cpupri structure
|
||||
* @cp: The cpupri context
|
||||
* @bootmem: true if allocations need to use bootmem
|
||||
*
|
||||
* Returns: -ENOMEM if memory fails.
|
||||
*/
|
||||
int cpupri_init(struct cpupri *cp)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(cp, 0, sizeof(*cp));
|
||||
|
||||
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
|
||||
struct cpupri_vec *vec = &cp->pri_to_cpu[i];
|
||||
|
||||
atomic_set(&vec->count, 0);
|
||||
if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
for_each_possible_cpu(i)
|
||||
cp->cpu_to_pri[i] = CPUPRI_INVALID;
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
for (i--; i >= 0; i--)
|
||||
free_cpumask_var(cp->pri_to_cpu[i].mask);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpupri_cleanup - clean up the cpupri structure
|
||||
* @cp: The cpupri context
|
||||
*/
|
||||
void cpupri_cleanup(struct cpupri *cp)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
|
||||
free_cpumask_var(cp->pri_to_cpu[i].mask);
|
||||
}
|
34
kernel/sched/cpupri.h
Normal file
34
kernel/sched/cpupri.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef _LINUX_CPUPRI_H
|
||||
#define _LINUX_CPUPRI_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
|
||||
|
||||
#define CPUPRI_INVALID -1
|
||||
#define CPUPRI_IDLE 0
|
||||
#define CPUPRI_NORMAL 1
|
||||
/* values 2-101 are RT priorities 0-99 */
|
||||
|
||||
struct cpupri_vec {
|
||||
atomic_t count;
|
||||
cpumask_var_t mask;
|
||||
};
|
||||
|
||||
struct cpupri {
|
||||
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
|
||||
int cpu_to_pri[NR_CPUS];
|
||||
};
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
int cpupri_find(struct cpupri *cp,
|
||||
struct task_struct *p, struct cpumask *lowest_mask);
|
||||
void cpupri_set(struct cpupri *cp, int cpu, int pri);
|
||||
int cpupri_init(struct cpupri *cp);
|
||||
void cpupri_cleanup(struct cpupri *cp);
|
||||
#else
|
||||
#define cpupri_set(cp, cpu, pri) do { } while (0)
|
||||
#define cpupri_init() do { } while (0)
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_CPUPRI_H */
|
510
kernel/sched/debug.c
Normal file
510
kernel/sched/debug.c
Normal file
@@ -0,0 +1,510 @@
|
||||
/*
|
||||
* kernel/sched/debug.c
|
||||
*
|
||||
* Print the CFS rbtree
|
||||
*
|
||||
* Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
static DEFINE_SPINLOCK(sched_debug_lock);
|
||||
|
||||
/*
|
||||
* This allows printing both to /proc/sched_debug and
|
||||
* to the console
|
||||
*/
|
||||
#define SEQ_printf(m, x...) \
|
||||
do { \
|
||||
if (m) \
|
||||
seq_printf(m, x); \
|
||||
else \
|
||||
printk(x); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Ease the printing of nsec fields:
|
||||
*/
|
||||
static long long nsec_high(unsigned long long nsec)
|
||||
{
|
||||
if ((long long)nsec < 0) {
|
||||
nsec = -nsec;
|
||||
do_div(nsec, 1000000);
|
||||
return -nsec;
|
||||
}
|
||||
do_div(nsec, 1000000);
|
||||
|
||||
return nsec;
|
||||
}
|
||||
|
||||
static unsigned long nsec_low(unsigned long long nsec)
|
||||
{
|
||||
if ((long long)nsec < 0)
|
||||
nsec = -nsec;
|
||||
|
||||
return do_div(nsec, 1000000);
|
||||
}
|
||||
|
||||
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
|
||||
{
|
||||
struct sched_entity *se = tg->se[cpu];
|
||||
if (!se)
|
||||
return;
|
||||
|
||||
#define P(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
|
||||
PN(se->exec_start);
|
||||
PN(se->vruntime);
|
||||
PN(se->sum_exec_runtime);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se->statistics.wait_start);
|
||||
PN(se->statistics.sleep_start);
|
||||
PN(se->statistics.block_start);
|
||||
PN(se->statistics.sleep_max);
|
||||
PN(se->statistics.block_max);
|
||||
PN(se->statistics.exec_max);
|
||||
PN(se->statistics.slice_max);
|
||||
PN(se->statistics.wait_max);
|
||||
PN(se->statistics.wait_sum);
|
||||
P(se->statistics.wait_count);
|
||||
#endif
|
||||
P(se->load.weight);
|
||||
#undef PN
|
||||
#undef P
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
static char group_path[PATH_MAX];
|
||||
|
||||
static char *task_group_path(struct task_group *tg)
|
||||
{
|
||||
if (autogroup_path(tg, group_path, PATH_MAX))
|
||||
return group_path;
|
||||
|
||||
/*
|
||||
* May be NULL if the underlying cgroup isn't fully-created yet
|
||||
*/
|
||||
if (!tg->css.cgroup) {
|
||||
group_path[0] = '\0';
|
||||
return group_path;
|
||||
}
|
||||
cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
|
||||
return group_path;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (rq->curr == p)
|
||||
SEQ_printf(m, "R");
|
||||
else
|
||||
SEQ_printf(m, " ");
|
||||
|
||||
SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
|
||||
p->comm, p->pid,
|
||||
SPLIT_NS(p->se.vruntime),
|
||||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.vruntime),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.statistics.sum_sleep_runtime));
|
||||
#else
|
||||
SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
|
||||
0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
|
||||
#endif
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
SEQ_printf(m, " %s", task_group_path(task_group(p)));
|
||||
#endif
|
||||
|
||||
SEQ_printf(m, "\n");
|
||||
}
|
||||
|
||||
static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
unsigned long flags;
|
||||
|
||||
SEQ_printf(m,
|
||||
"\nrunnable tasks:\n"
|
||||
" task PID tree-key switches prio"
|
||||
" exec-runtime sum-exec sum-sleep\n"
|
||||
"------------------------------------------------------"
|
||||
"----------------------------------------------------\n");
|
||||
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
|
||||
do_each_thread(g, p) {
|
||||
if (!p->on_rq || task_cpu(p) != rq_cpu)
|
||||
continue;
|
||||
|
||||
print_task(m, rq, p);
|
||||
} while_each_thread(g, p);
|
||||
|
||||
read_unlock_irqrestore(&tasklist_lock, flags);
|
||||
}
|
||||
|
||||
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
|
||||
{
|
||||
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
|
||||
spread, rq0_min_vruntime, spread0;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
struct sched_entity *last;
|
||||
unsigned long flags;
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
|
||||
#else
|
||||
SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
|
||||
#endif
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
|
||||
SPLIT_NS(cfs_rq->exec_clock));
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
if (cfs_rq->rb_leftmost)
|
||||
MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
|
||||
last = __pick_last_entity(cfs_rq);
|
||||
if (last)
|
||||
max_vruntime = last->vruntime;
|
||||
min_vruntime = cfs_rq->min_vruntime;
|
||||
rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
|
||||
SPLIT_NS(MIN_vruntime));
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime",
|
||||
SPLIT_NS(min_vruntime));
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime",
|
||||
SPLIT_NS(max_vruntime));
|
||||
spread = max_vruntime - MIN_vruntime;
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread",
|
||||
SPLIT_NS(spread));
|
||||
spread0 = min_vruntime - rq0_min_vruntime;
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0",
|
||||
SPLIT_NS(spread0));
|
||||
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
|
||||
cfs_rq->nr_spread_over);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
#ifdef CONFIG_SMP
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
|
||||
SPLIT_NS(cfs_rq->load_avg));
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
|
||||
SPLIT_NS(cfs_rq->load_period));
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
|
||||
cfs_rq->load_contribution);
|
||||
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
|
||||
atomic_read(&cfs_rq->tg->load_weight));
|
||||
#endif
|
||||
|
||||
print_cfs_group_stats(m, cpu, cfs_rq->tg);
|
||||
#endif
|
||||
}
|
||||
|
||||
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
||||
{
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
|
||||
#else
|
||||
SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
|
||||
#endif
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
|
||||
|
||||
P(rt_nr_running);
|
||||
P(rt_throttled);
|
||||
PN(rt_time);
|
||||
PN(rt_runtime);
|
||||
|
||||
#undef PN
|
||||
#undef P
|
||||
}
|
||||
|
||||
extern __read_mostly int sched_clock_running;
|
||||
|
||||
static void print_cpu(struct seq_file *m, int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
#ifdef CONFIG_X86
|
||||
{
|
||||
unsigned int freq = cpu_khz ? : 1;
|
||||
|
||||
SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n",
|
||||
cpu, freq / 1000, (freq % 1000));
|
||||
}
|
||||
#else
|
||||
SEQ_printf(m, "\ncpu#%d\n", cpu);
|
||||
#endif
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
|
||||
|
||||
P(nr_running);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "load",
|
||||
rq->load.weight);
|
||||
P(nr_switches);
|
||||
P(nr_load_updates);
|
||||
P(nr_uninterruptible);
|
||||
PN(next_balance);
|
||||
P(curr->pid);
|
||||
PN(clock);
|
||||
P(cpu_load[0]);
|
||||
P(cpu_load[1]);
|
||||
P(cpu_load[2]);
|
||||
P(cpu_load[3]);
|
||||
P(cpu_load[4]);
|
||||
#undef P
|
||||
#undef PN
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
|
||||
|
||||
P(yld_count);
|
||||
|
||||
P(sched_switch);
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
#ifdef CONFIG_SMP
|
||||
P64(avg_idle);
|
||||
#endif
|
||||
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
|
||||
#undef P
|
||||
#undef P64
|
||||
#endif
|
||||
spin_lock_irqsave(&sched_debug_lock, flags);
|
||||
print_cfs_stats(m, cpu);
|
||||
print_rt_stats(m, cpu);
|
||||
|
||||
rcu_read_lock();
|
||||
print_rq(m, rq, cpu);
|
||||
rcu_read_unlock();
|
||||
spin_unlock_irqrestore(&sched_debug_lock, flags);
|
||||
}
|
||||
|
||||
static const char *sched_tunable_scaling_names[] = {
|
||||
"none",
|
||||
"logaritmic",
|
||||
"linear"
|
||||
};
|
||||
|
||||
static int sched_debug_show(struct seq_file *m, void *v)
|
||||
{
|
||||
u64 ktime, sched_clk, cpu_clk;
|
||||
unsigned long flags;
|
||||
int cpu;
|
||||
|
||||
local_irq_save(flags);
|
||||
ktime = ktime_to_ns(ktime_get());
|
||||
sched_clk = sched_clock();
|
||||
cpu_clk = local_clock();
|
||||
local_irq_restore(flags);
|
||||
|
||||
SEQ_printf(m, "Sched Debug Version: v0.10, %s %.*s\n",
|
||||
init_utsname()->release,
|
||||
(int)strcspn(init_utsname()->version, " "),
|
||||
init_utsname()->version);
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
|
||||
PN(ktime);
|
||||
PN(sched_clk);
|
||||
PN(cpu_clk);
|
||||
P(jiffies);
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
P(sched_clock_stable);
|
||||
#endif
|
||||
#undef PN
|
||||
#undef P
|
||||
|
||||
SEQ_printf(m, "\n");
|
||||
SEQ_printf(m, "sysctl_sched\n");
|
||||
|
||||
#define P(x) \
|
||||
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
|
||||
PN(sysctl_sched_latency);
|
||||
PN(sysctl_sched_min_granularity);
|
||||
PN(sysctl_sched_wakeup_granularity);
|
||||
P(sysctl_sched_child_runs_first);
|
||||
P(sysctl_sched_features);
|
||||
#undef PN
|
||||
#undef P
|
||||
|
||||
SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling",
|
||||
sysctl_sched_tunable_scaling,
|
||||
sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
print_cpu(m, cpu);
|
||||
|
||||
SEQ_printf(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void sysrq_sched_debug_show(void)
|
||||
{
|
||||
sched_debug_show(NULL, NULL);
|
||||
}
|
||||
|
||||
static int sched_debug_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_debug_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations sched_debug_fops = {
|
||||
.open = sched_debug_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init init_sched_debug_procfs(void)
|
||||
{
|
||||
struct proc_dir_entry *pe;
|
||||
|
||||
pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
|
||||
if (!pe)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(init_sched_debug_procfs);
|
||||
|
||||
void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
{
|
||||
unsigned long nr_switches;
|
||||
|
||||
SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
|
||||
get_nr_threads(p));
|
||||
SEQ_printf(m,
|
||||
"---------------------------------------------------------\n");
|
||||
#define __P(F) \
|
||||
SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
|
||||
#define P(F) \
|
||||
SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
|
||||
#define __PN(F) \
|
||||
SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
|
||||
#define PN(F) \
|
||||
SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
|
||||
|
||||
PN(se.exec_start);
|
||||
PN(se.vruntime);
|
||||
PN(se.sum_exec_runtime);
|
||||
|
||||
nr_switches = p->nvcsw + p->nivcsw;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se.statistics.wait_start);
|
||||
PN(se.statistics.sleep_start);
|
||||
PN(se.statistics.block_start);
|
||||
PN(se.statistics.sleep_max);
|
||||
PN(se.statistics.block_max);
|
||||
PN(se.statistics.exec_max);
|
||||
PN(se.statistics.slice_max);
|
||||
PN(se.statistics.wait_max);
|
||||
PN(se.statistics.wait_sum);
|
||||
P(se.statistics.wait_count);
|
||||
PN(se.statistics.iowait_sum);
|
||||
P(se.statistics.iowait_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.statistics.nr_migrations_cold);
|
||||
P(se.statistics.nr_failed_migrations_affine);
|
||||
P(se.statistics.nr_failed_migrations_running);
|
||||
P(se.statistics.nr_failed_migrations_hot);
|
||||
P(se.statistics.nr_forced_migrations);
|
||||
P(se.statistics.nr_wakeups);
|
||||
P(se.statistics.nr_wakeups_sync);
|
||||
P(se.statistics.nr_wakeups_migrate);
|
||||
P(se.statistics.nr_wakeups_local);
|
||||
P(se.statistics.nr_wakeups_remote);
|
||||
P(se.statistics.nr_wakeups_affine);
|
||||
P(se.statistics.nr_wakeups_affine_attempts);
|
||||
P(se.statistics.nr_wakeups_passive);
|
||||
P(se.statistics.nr_wakeups_idle);
|
||||
|
||||
{
|
||||
u64 avg_atom, avg_per_cpu;
|
||||
|
||||
avg_atom = p->se.sum_exec_runtime;
|
||||
if (nr_switches)
|
||||
do_div(avg_atom, nr_switches);
|
||||
else
|
||||
avg_atom = -1LL;
|
||||
|
||||
avg_per_cpu = p->se.sum_exec_runtime;
|
||||
if (p->se.nr_migrations) {
|
||||
avg_per_cpu = div64_u64(avg_per_cpu,
|
||||
p->se.nr_migrations);
|
||||
} else {
|
||||
avg_per_cpu = -1LL;
|
||||
}
|
||||
|
||||
__PN(avg_atom);
|
||||
__PN(avg_per_cpu);
|
||||
}
|
||||
#endif
|
||||
__P(nr_switches);
|
||||
SEQ_printf(m, "%-35s:%21Ld\n",
|
||||
"nr_voluntary_switches", (long long)p->nvcsw);
|
||||
SEQ_printf(m, "%-35s:%21Ld\n",
|
||||
"nr_involuntary_switches", (long long)p->nivcsw);
|
||||
|
||||
P(se.load.weight);
|
||||
P(policy);
|
||||
P(prio);
|
||||
#undef PN
|
||||
#undef __PN
|
||||
#undef P
|
||||
#undef __P
|
||||
|
||||
{
|
||||
unsigned int this_cpu = raw_smp_processor_id();
|
||||
u64 t0, t1;
|
||||
|
||||
t0 = cpu_clock(this_cpu);
|
||||
t1 = cpu_clock(this_cpu);
|
||||
SEQ_printf(m, "%-35s:%21Ld\n",
|
||||
"clock-delta", (long long)(t1-t0));
|
||||
}
|
||||
}
|
||||
|
||||
void proc_sched_set_task(struct task_struct *p)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
||||
#endif
|
||||
}
|
5577
kernel/sched/fair.c
Normal file
5577
kernel/sched/fair.c
Normal file
文件差異過大導致無法顯示
Load Diff
70
kernel/sched/features.h
Normal file
70
kernel/sched/features.h
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Only give sleepers 50% of their service deficit. This allows
|
||||
* them to run sooner, but does not allow tons of sleepers to
|
||||
* rip the spread apart.
|
||||
*/
|
||||
SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
|
||||
|
||||
/*
|
||||
* Place new tasks ahead so that they do not starve already running
|
||||
* tasks
|
||||
*/
|
||||
SCHED_FEAT(START_DEBIT, true)
|
||||
|
||||
/*
|
||||
* Based on load and program behaviour, see if it makes sense to place
|
||||
* a newly woken task on the same cpu as the task that woke it --
|
||||
* improve cache locality. Typically used with SYNC wakeups as
|
||||
* generated by pipes and the like, see also SYNC_WAKEUPS.
|
||||
*/
|
||||
SCHED_FEAT(AFFINE_WAKEUPS, true)
|
||||
|
||||
/*
|
||||
* Prefer to schedule the task we woke last (assuming it failed
|
||||
* wakeup-preemption), since its likely going to consume data we
|
||||
* touched, increases cache locality.
|
||||
*/
|
||||
SCHED_FEAT(NEXT_BUDDY, false)
|
||||
|
||||
/*
|
||||
* Prefer to schedule the task that ran last (when we did
|
||||
* wake-preempt) as that likely will touch the same data, increases
|
||||
* cache locality.
|
||||
*/
|
||||
SCHED_FEAT(LAST_BUDDY, true)
|
||||
|
||||
/*
|
||||
* Consider buddies to be cache hot, decreases the likelyness of a
|
||||
* cache buddy being migrated away, increases cache locality.
|
||||
*/
|
||||
SCHED_FEAT(CACHE_HOT_BUDDY, true)
|
||||
|
||||
/*
|
||||
* Use arch dependent cpu power functions
|
||||
*/
|
||||
SCHED_FEAT(ARCH_POWER, false)
|
||||
|
||||
SCHED_FEAT(HRTICK, false)
|
||||
SCHED_FEAT(DOUBLE_TICK, false)
|
||||
SCHED_FEAT(LB_BIAS, true)
|
||||
|
||||
/*
|
||||
* Spin-wait on mutex acquisition when the mutex owner is running on
|
||||
* another cpu -- assumes that when the owner is running, it will soon
|
||||
* release the lock. Decreases scheduling overhead.
|
||||
*/
|
||||
SCHED_FEAT(OWNER_SPIN, true)
|
||||
|
||||
/*
|
||||
* Decrement CPU power based on time not spent running tasks
|
||||
*/
|
||||
SCHED_FEAT(NONTASK_POWER, true)
|
||||
|
||||
/*
|
||||
* Queue remote wakeups on the target CPU and process them
|
||||
* using the scheduler IPI. Reduces rq->lock contention/bounces.
|
||||
*/
|
||||
SCHED_FEAT(TTWU_QUEUE, true)
|
||||
|
||||
SCHED_FEAT(FORCE_SD_OVERLAP, false)
|
||||
SCHED_FEAT(RT_RUNTIME_SHARE, true)
|
99
kernel/sched/idle_task.c
Normal file
99
kernel/sched/idle_task.c
Normal file
@@ -0,0 +1,99 @@
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* idle-task scheduling class.
|
||||
*
|
||||
* (NOTE: these are not related to SCHED_IDLE tasks which are
|
||||
* handled in sched_fair.c)
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
resched_task(rq->idle);
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||
{
|
||||
schedstat_inc(rq, sched_goidle);
|
||||
calc_load_account_idle(rq);
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
/*
|
||||
* It is not legal to sleep in the idle task - print a warning
|
||||
* message if some code attempts to do it:
|
||||
*/
|
||||
static void
|
||||
dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
printk(KERN_ERR "bad: scheduling from the idle thread!\n");
|
||||
dump_stack();
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
}
|
||||
|
||||
static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
static void set_curr_task_idle(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_idle(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static void
|
||||
prio_changed_idle(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
{
|
||||
BUG();
|
||||
}
|
||||
|
||||
static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU idle tasks:
|
||||
*/
|
||||
const struct sched_class idle_sched_class = {
|
||||
/* .next is NULL */
|
||||
/* no enqueue/yield_task for idle tasks */
|
||||
|
||||
/* dequeue is not valid, we print a debug message there: */
|
||||
.dequeue_task = dequeue_task_idle,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_idle,
|
||||
|
||||
.pick_next_task = pick_next_task_idle,
|
||||
.put_prev_task = put_prev_task_idle,
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
.task_tick = task_tick_idle,
|
||||
|
||||
.get_rr_interval = get_rr_interval_idle,
|
||||
|
||||
.prio_changed = prio_changed_idle,
|
||||
.switched_to = switched_to_idle,
|
||||
};
|
2048
kernel/sched/rt.c
Normal file
2048
kernel/sched/rt.c
Normal file
文件差異過大導致無法顯示
Load Diff
1136
kernel/sched/sched.h
Normal file
1136
kernel/sched/sched.h
Normal file
文件差異過大導致無法顯示
Load Diff
111
kernel/sched/stats.c
Normal file
111
kernel/sched/stats.c
Normal file
@@ -0,0 +1,111 @@
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* bump this up when changing the output format or the meaning of an existing
|
||||
* format, so that tools can adapt (or abort)
|
||||
*/
|
||||
#define SCHEDSTAT_VERSION 15
|
||||
|
||||
static int show_schedstat(struct seq_file *seq, void *v)
|
||||
{
|
||||
int cpu;
|
||||
int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9;
|
||||
char *mask_str = kmalloc(mask_len, GFP_KERNEL);
|
||||
|
||||
if (mask_str == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION);
|
||||
seq_printf(seq, "timestamp %lu\n", jiffies);
|
||||
for_each_online_cpu(cpu) {
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
#ifdef CONFIG_SMP
|
||||
struct sched_domain *sd;
|
||||
int dcount = 0;
|
||||
#endif
|
||||
|
||||
/* runqueue-specific stats */
|
||||
seq_printf(seq,
|
||||
"cpu%d %u %u %u %u %u %u %llu %llu %lu",
|
||||
cpu, rq->yld_count,
|
||||
rq->sched_switch, rq->sched_count, rq->sched_goidle,
|
||||
rq->ttwu_count, rq->ttwu_local,
|
||||
rq->rq_cpu_time,
|
||||
rq->rq_sched_info.run_delay, rq->rq_sched_info.pcount);
|
||||
|
||||
seq_printf(seq, "\n");
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* domain-specific stats */
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd) {
|
||||
enum cpu_idle_type itype;
|
||||
|
||||
cpumask_scnprintf(mask_str, mask_len,
|
||||
sched_domain_span(sd));
|
||||
seq_printf(seq, "domain%d %s", dcount++, mask_str);
|
||||
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
|
||||
itype++) {
|
||||
seq_printf(seq, " %u %u %u %u %u %u %u %u",
|
||||
sd->lb_count[itype],
|
||||
sd->lb_balanced[itype],
|
||||
sd->lb_failed[itype],
|
||||
sd->lb_imbalance[itype],
|
||||
sd->lb_gained[itype],
|
||||
sd->lb_hot_gained[itype],
|
||||
sd->lb_nobusyq[itype],
|
||||
sd->lb_nobusyg[itype]);
|
||||
}
|
||||
seq_printf(seq,
|
||||
" %u %u %u %u %u %u %u %u %u %u %u %u\n",
|
||||
sd->alb_count, sd->alb_failed, sd->alb_pushed,
|
||||
sd->sbe_count, sd->sbe_balanced, sd->sbe_pushed,
|
||||
sd->sbf_count, sd->sbf_balanced, sd->sbf_pushed,
|
||||
sd->ttwu_wake_remote, sd->ttwu_move_affine,
|
||||
sd->ttwu_move_balance);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
}
|
||||
kfree(mask_str);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int schedstat_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32);
|
||||
char *buf = kmalloc(size, GFP_KERNEL);
|
||||
struct seq_file *m;
|
||||
int res;
|
||||
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
res = single_open(file, show_schedstat, NULL);
|
||||
if (!res) {
|
||||
m = file->private_data;
|
||||
m->buf = buf;
|
||||
m->size = size;
|
||||
} else
|
||||
kfree(buf);
|
||||
return res;
|
||||
}
|
||||
|
||||
static const struct file_operations proc_schedstat_operations = {
|
||||
.open = schedstat_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init proc_schedstat_init(void)
|
||||
{
|
||||
proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
|
||||
return 0;
|
||||
}
|
||||
module_init(proc_schedstat_init);
|
231
kernel/sched/stats.h
Normal file
231
kernel/sched/stats.h
Normal file
@@ -0,0 +1,231 @@
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
|
||||
/*
|
||||
* Expects runqueue lock to be held for atomicity of update
|
||||
*/
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq) {
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
rq->rq_sched_info.pcount++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Expects runqueue lock to be held for atomicity of update
|
||||
*/
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq)
|
||||
rq->rq_cpu_time += delta;
|
||||
}
|
||||
|
||||
static inline void
|
||||
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
{
|
||||
if (rq)
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
}
|
||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||
# define schedstat_set(var, val) do { var = (val); } while (0)
|
||||
#else /* !CONFIG_SCHEDSTATS */
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
# define schedstat_inc(rq, field) do { } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { } while (0)
|
||||
# define schedstat_set(var, val) do { } while (0)
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||
static inline void sched_info_reset_dequeued(struct task_struct *t)
|
||||
{
|
||||
t->sched_info.last_queued = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are interested in knowing how long it was from the *first* time a
|
||||
* task was queued to the time that it finally hit a cpu, we call this routine
|
||||
* from dequeue_task() to account for possible rq->clock skew across cpus. The
|
||||
* delta taken on each cpu would annul the skew.
|
||||
*/
|
||||
static inline void sched_info_dequeued(struct task_struct *t)
|
||||
{
|
||||
unsigned long long now = task_rq(t)->clock, delta = 0;
|
||||
|
||||
if (unlikely(sched_info_on()))
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
sched_info_reset_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
|
||||
rq_sched_info_dequeued(task_rq(t), delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a task finally hits the cpu. We can now calculate how
|
||||
* long it was waiting to run. We also note when it began so that we
|
||||
* can keep stats on how long its timeslice is.
|
||||
*/
|
||||
static void sched_info_arrive(struct task_struct *t)
|
||||
{
|
||||
unsigned long long now = task_rq(t)->clock, delta = 0;
|
||||
|
||||
if (t->sched_info.last_queued)
|
||||
delta = now - t->sched_info.last_queued;
|
||||
sched_info_reset_dequeued(t);
|
||||
t->sched_info.run_delay += delta;
|
||||
t->sched_info.last_arrival = now;
|
||||
t->sched_info.pcount++;
|
||||
|
||||
rq_sched_info_arrive(task_rq(t), delta);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is only called from enqueue_task(), but also only updates
|
||||
* the timestamp if it is already not set. It's assumed that
|
||||
* sched_info_dequeued() will clear that stamp when appropriate.
|
||||
*/
|
||||
static inline void sched_info_queued(struct task_struct *t)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
if (!t->sched_info.last_queued)
|
||||
t->sched_info.last_queued = task_rq(t)->clock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when a process ceases being the active-running process, either
|
||||
* voluntarily or involuntarily. Now we can calculate how long we ran.
|
||||
* Also, if the process is still in the TASK_RUNNING state, call
|
||||
* sched_info_queued() to mark that it has now again started waiting on
|
||||
* the runqueue.
|
||||
*/
|
||||
static inline void sched_info_depart(struct task_struct *t)
|
||||
{
|
||||
unsigned long long delta = task_rq(t)->clock -
|
||||
t->sched_info.last_arrival;
|
||||
|
||||
rq_sched_info_depart(task_rq(t), delta);
|
||||
|
||||
if (t->state == TASK_RUNNING)
|
||||
sched_info_queued(t);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when tasks are switched involuntarily due, typically, to expiring
|
||||
* their time slice. (This may also be called when switching to or from
|
||||
* the idle task.) We are only called when prev != next.
|
||||
*/
|
||||
static inline void
|
||||
__sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
struct rq *rq = task_rq(prev);
|
||||
|
||||
/*
|
||||
* prev now departs the cpu. It's not interesting to record
|
||||
* stats about how efficient we were at scheduling the idle
|
||||
* process, however.
|
||||
*/
|
||||
if (prev != rq->idle)
|
||||
sched_info_depart(prev);
|
||||
|
||||
if (next != rq->idle)
|
||||
sched_info_arrive(next);
|
||||
}
|
||||
static inline void
|
||||
sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
{
|
||||
if (unlikely(sched_info_on()))
|
||||
__sched_info_switch(prev, next);
|
||||
}
|
||||
#else
|
||||
#define sched_info_queued(t) do { } while (0)
|
||||
#define sched_info_reset_dequeued(t) do { } while (0)
|
||||
#define sched_info_dequeued(t) do { } while (0)
|
||||
#define sched_info_switch(t, next) do { } while (0)
|
||||
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */
|
||||
|
||||
/*
|
||||
* The following are functions that support scheduler-internal time accounting.
|
||||
* These functions are generally called at the timer tick. None of this depends
|
||||
* on CONFIG_SCHEDSTATS.
|
||||
*/
|
||||
|
||||
/**
|
||||
* account_group_user_time - Maintain utime for a thread group.
|
||||
*
|
||||
* @tsk: Pointer to task structure.
|
||||
* @cputime: Time value by which to increment the utime field of the
|
||||
* thread_group_cputime structure.
|
||||
*
|
||||
* If thread group time is being maintained, get the structure for the
|
||||
* running CPU and update the utime field there.
|
||||
*/
|
||||
static inline void account_group_user_time(struct task_struct *tsk,
|
||||
cputime_t cputime)
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
cputimer->cputime.utime += cputime;
|
||||
raw_spin_unlock(&cputimer->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* account_group_system_time - Maintain stime for a thread group.
|
||||
*
|
||||
* @tsk: Pointer to task structure.
|
||||
* @cputime: Time value by which to increment the stime field of the
|
||||
* thread_group_cputime structure.
|
||||
*
|
||||
* If thread group time is being maintained, get the structure for the
|
||||
* running CPU and update the stime field there.
|
||||
*/
|
||||
static inline void account_group_system_time(struct task_struct *tsk,
|
||||
cputime_t cputime)
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
cputimer->cputime.stime += cputime;
|
||||
raw_spin_unlock(&cputimer->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* account_group_exec_runtime - Maintain exec runtime for a thread group.
|
||||
*
|
||||
* @tsk: Pointer to task structure.
|
||||
* @ns: Time value by which to increment the sum_exec_runtime field
|
||||
* of the thread_group_cputime structure.
|
||||
*
|
||||
* If thread group time is being maintained, get the structure for the
|
||||
* running CPU and update the sum_exec_runtime field there.
|
||||
*/
|
||||
static inline void account_group_exec_runtime(struct task_struct *tsk,
|
||||
unsigned long long ns)
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
cputimer->cputime.sum_exec_runtime += ns;
|
||||
raw_spin_unlock(&cputimer->lock);
|
||||
}
|
108
kernel/sched/stop_task.c
Normal file
108
kernel/sched/stop_task.c
Normal file
@@ -0,0 +1,108 @@
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* stop-task scheduling class.
|
||||
*
|
||||
* The stop task is the highest priority task in the system, it preempts
|
||||
* everything and will be preempted by nothing.
|
||||
*
|
||||
* See kernel/stop_machine.c
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int
|
||||
select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* stop tasks as never migrate */
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static void
|
||||
check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
/* we're never preempted */
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_stop(struct rq *rq)
|
||||
{
|
||||
struct task_struct *stop = rq->stop;
|
||||
|
||||
if (stop && stop->on_rq)
|
||||
return stop;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
inc_nr_running(rq);
|
||||
}
|
||||
|
||||
static void
|
||||
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
dec_nr_running(rq);
|
||||
}
|
||||
|
||||
static void yield_task_stop(struct rq *rq)
|
||||
{
|
||||
BUG(); /* the stop task should never yield, its pointless. */
|
||||
}
|
||||
|
||||
static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
}
|
||||
|
||||
static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
|
||||
{
|
||||
}
|
||||
|
||||
static void set_curr_task_stop(struct rq *rq)
|
||||
{
|
||||
}
|
||||
|
||||
static void switched_to_stop(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
BUG(); /* its impossible to change to this class */
|
||||
}
|
||||
|
||||
static void
|
||||
prio_changed_stop(struct rq *rq, struct task_struct *p, int oldprio)
|
||||
{
|
||||
BUG(); /* how!?, what priority? */
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
get_rr_interval_stop(struct rq *rq, struct task_struct *task)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple, special scheduling class for the per-CPU stop tasks:
|
||||
*/
|
||||
const struct sched_class stop_sched_class = {
|
||||
.next = &rt_sched_class,
|
||||
|
||||
.enqueue_task = enqueue_task_stop,
|
||||
.dequeue_task = dequeue_task_stop,
|
||||
.yield_task = yield_task_stop,
|
||||
|
||||
.check_preempt_curr = check_preempt_curr_stop,
|
||||
|
||||
.pick_next_task = pick_next_task_stop,
|
||||
.put_prev_task = put_prev_task_stop,
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_stop,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_stop,
|
||||
.task_tick = task_tick_stop,
|
||||
|
||||
.get_rr_interval = get_rr_interval_stop,
|
||||
|
||||
.prio_changed = prio_changed_stop,
|
||||
.switched_to = switched_to_stop,
|
||||
};
|
Reference in New Issue
Block a user