Merge commit 'v2.6.27-rc6' into x86/unify-cpu-detect
Conflicts: arch/x86/kernel/cpu/amd.c arch/x86/kernel/cpu/common.c arch/x86/kernel/cpu/common_64.c arch/x86/kernel/cpu/feature_names.c include/asm-x86/cpufeature.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
318
kernel/cpuset.c
318
kernel/cpuset.c
@@ -14,6 +14,8 @@
|
||||
* 2003-10-22 Updates by Stephen Hemminger.
|
||||
* 2004 May-July Rework by Paul Jackson.
|
||||
* 2006 Rework by Paul Menage to use generic cgroups
|
||||
* 2008 Rework of the scheduler domains and CPU hotplug handling
|
||||
* by Max Krasnyansky
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file COPYING in the main directory of the Linux
|
||||
@@ -236,9 +238,11 @@ static struct cpuset top_cpuset = {
|
||||
|
||||
static DEFINE_MUTEX(callback_mutex);
|
||||
|
||||
/* This is ugly, but preserves the userspace API for existing cpuset
|
||||
/*
|
||||
* This is ugly, but preserves the userspace API for existing cpuset
|
||||
* users. If someone tries to mount the "cpuset" filesystem, we
|
||||
* silently switch it to mount "cgroup" instead */
|
||||
* silently switch it to mount "cgroup" instead
|
||||
*/
|
||||
static int cpuset_get_sb(struct file_system_type *fs_type,
|
||||
int flags, const char *unused_dev_name,
|
||||
void *data, struct vfsmount *mnt)
|
||||
@@ -473,10 +477,9 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper routine for rebuild_sched_domains().
|
||||
* Helper routine for generate_sched_domains().
|
||||
* Do cpusets a, b have overlapping cpus_allowed masks?
|
||||
*/
|
||||
|
||||
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
|
||||
{
|
||||
return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
|
||||
@@ -518,26 +521,15 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
}
|
||||
|
||||
/*
|
||||
* rebuild_sched_domains()
|
||||
* generate_sched_domains()
|
||||
*
|
||||
* This routine will be called to rebuild the scheduler's dynamic
|
||||
* sched domains:
|
||||
* - if the flag 'sched_load_balance' of any cpuset with non-empty
|
||||
* 'cpus' changes,
|
||||
* - or if the 'cpus' allowed changes in any cpuset which has that
|
||||
* flag enabled,
|
||||
* - or if the 'sched_relax_domain_level' of any cpuset which has
|
||||
* that flag enabled and with non-empty 'cpus' changes,
|
||||
* - or if any cpuset with non-empty 'cpus' is removed,
|
||||
* - or if a cpu gets offlined.
|
||||
*
|
||||
* This routine builds a partial partition of the systems CPUs
|
||||
* (the set of non-overlappping cpumask_t's in the array 'part'
|
||||
* below), and passes that partial partition to the kernel/sched.c
|
||||
* partition_sched_domains() routine, which will rebuild the
|
||||
* schedulers load balancing domains (sched domains) as specified
|
||||
* by that partial partition. A 'partial partition' is a set of
|
||||
* non-overlapping subsets whose union is a subset of that set.
|
||||
* This function builds a partial partition of the systems CPUs
|
||||
* A 'partial partition' is a set of non-overlapping subsets whose
|
||||
* union is a subset of that set.
|
||||
* The output of this function needs to be passed to kernel/sched.c
|
||||
* partition_sched_domains() routine, which will rebuild the scheduler's
|
||||
* load balancing domains (sched domains) as specified by that partial
|
||||
* partition.
|
||||
*
|
||||
* See "What is sched_load_balance" in Documentation/cpusets.txt
|
||||
* for a background explanation of this.
|
||||
@@ -547,13 +539,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
* domains when operating in the severe memory shortage situations
|
||||
* that could cause allocation failures below.
|
||||
*
|
||||
* Call with cgroup_mutex held. May take callback_mutex during
|
||||
* call due to the kfifo_alloc() and kmalloc() calls. May nest
|
||||
* a call to the get_online_cpus()/put_online_cpus() pair.
|
||||
* Must not be called holding callback_mutex, because we must not
|
||||
* call get_online_cpus() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside get_online_cpus() calls.
|
||||
* So the reverse nesting would risk an ABBA deadlock.
|
||||
* Must be called with cgroup_lock held.
|
||||
*
|
||||
* The three key local variables below are:
|
||||
* q - a linked-list queue of cpuset pointers, used to implement a
|
||||
@@ -588,10 +574,10 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
* element of the partition (one sched domain) to be passed to
|
||||
* partition_sched_domains().
|
||||
*/
|
||||
|
||||
void rebuild_sched_domains(void)
|
||||
static int generate_sched_domains(cpumask_t **domains,
|
||||
struct sched_domain_attr **attributes)
|
||||
{
|
||||
LIST_HEAD(q); /* queue of cpusets to be scanned*/
|
||||
LIST_HEAD(q); /* queue of cpusets to be scanned */
|
||||
struct cpuset *cp; /* scans q */
|
||||
struct cpuset **csa; /* array of all cpuset ptrs */
|
||||
int csn; /* how many cpuset ptrs in csa so far */
|
||||
@@ -601,23 +587,26 @@ void rebuild_sched_domains(void)
|
||||
int ndoms; /* number of sched domains in result */
|
||||
int nslot; /* next empty doms[] cpumask_t slot */
|
||||
|
||||
csa = NULL;
|
||||
ndoms = 0;
|
||||
doms = NULL;
|
||||
dattr = NULL;
|
||||
csa = NULL;
|
||||
|
||||
/* Special case for the 99% of systems with one, full, sched domain */
|
||||
if (is_sched_load_balance(&top_cpuset)) {
|
||||
ndoms = 1;
|
||||
doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
goto done;
|
||||
|
||||
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
if (dattr) {
|
||||
*dattr = SD_ATTR_INIT;
|
||||
update_domain_attr_tree(dattr, &top_cpuset);
|
||||
}
|
||||
*doms = top_cpuset.cpus_allowed;
|
||||
goto rebuild;
|
||||
|
||||
ndoms = 1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
|
||||
@@ -680,61 +669,141 @@ restart:
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert <csn, csa> to <ndoms, doms> */
|
||||
/*
|
||||
* Now we know how many domains to create.
|
||||
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
|
||||
*/
|
||||
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
if (!doms) {
|
||||
ndoms = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* The rest of the code, including the scheduler, can deal with
|
||||
* dattr==NULL case. No need to abort if alloc fails.
|
||||
*/
|
||||
dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
|
||||
for (nslot = 0, i = 0; i < csn; i++) {
|
||||
struct cpuset *a = csa[i];
|
||||
cpumask_t *dp;
|
||||
int apn = a->pn;
|
||||
|
||||
if (apn >= 0) {
|
||||
cpumask_t *dp = doms + nslot;
|
||||
|
||||
if (nslot == ndoms) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_WARNING
|
||||
"rebuild_sched_domains confused:"
|
||||
" nslot %d, ndoms %d, csn %d, i %d,"
|
||||
" apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
warnings--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
cpus_clear(*dp);
|
||||
if (dattr)
|
||||
*(dattr + nslot) = SD_ATTR_INIT;
|
||||
for (j = i; j < csn; j++) {
|
||||
struct cpuset *b = csa[j];
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpus_or(*dp, *dp, b->cpus_allowed);
|
||||
b->pn = -1;
|
||||
if (dattr)
|
||||
update_domain_attr_tree(dattr
|
||||
+ nslot, b);
|
||||
}
|
||||
}
|
||||
nslot++;
|
||||
if (apn < 0) {
|
||||
/* Skip completed partitions */
|
||||
continue;
|
||||
}
|
||||
|
||||
dp = doms + nslot;
|
||||
|
||||
if (nslot == ndoms) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_WARNING
|
||||
"rebuild_sched_domains confused:"
|
||||
" nslot %d, ndoms %d, csn %d, i %d,"
|
||||
" apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
warnings--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
cpus_clear(*dp);
|
||||
if (dattr)
|
||||
*(dattr + nslot) = SD_ATTR_INIT;
|
||||
for (j = i; j < csn; j++) {
|
||||
struct cpuset *b = csa[j];
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpus_or(*dp, *dp, b->cpus_allowed);
|
||||
if (dattr)
|
||||
update_domain_attr_tree(dattr + nslot, b);
|
||||
|
||||
/* Done with this partition */
|
||||
b->pn = -1;
|
||||
}
|
||||
}
|
||||
nslot++;
|
||||
}
|
||||
BUG_ON(nslot != ndoms);
|
||||
|
||||
rebuild:
|
||||
/* Have scheduler rebuild sched domains */
|
||||
get_online_cpus();
|
||||
partition_sched_domains(ndoms, doms, dattr);
|
||||
put_online_cpus();
|
||||
|
||||
done:
|
||||
kfree(csa);
|
||||
/* Don't kfree(doms) -- partition_sched_domains() does that. */
|
||||
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
|
||||
|
||||
*domains = doms;
|
||||
*attributes = dattr;
|
||||
return ndoms;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebuild scheduler domains.
|
||||
*
|
||||
* Call with neither cgroup_mutex held nor within get_online_cpus().
|
||||
* Takes both cgroup_mutex and get_online_cpus().
|
||||
*
|
||||
* Cannot be directly called from cpuset code handling changes
|
||||
* to the cpuset pseudo-filesystem, because it cannot be called
|
||||
* from code that already holds cgroup_mutex.
|
||||
*/
|
||||
static void do_rebuild_sched_domains(struct work_struct *unused)
|
||||
{
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_t *doms;
|
||||
int ndoms;
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
/* Generate domain masks and attrs */
|
||||
cgroup_lock();
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
cgroup_unlock();
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
|
||||
|
||||
/*
|
||||
* Rebuild scheduler domains, asynchronously via workqueue.
|
||||
*
|
||||
* If the flag 'sched_load_balance' of any cpuset with non-empty
|
||||
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
|
||||
* which has that flag enabled, or if any cpuset with a non-empty
|
||||
* 'cpus' is removed, then call this routine to rebuild the
|
||||
* scheduler's dynamic sched domains.
|
||||
*
|
||||
* The rebuild_sched_domains() and partition_sched_domains()
|
||||
* routines must nest cgroup_lock() inside get_online_cpus(),
|
||||
* but such cpuset changes as these must nest that locking the
|
||||
* other way, holding cgroup_lock() for much of the code.
|
||||
*
|
||||
* So in order to avoid an ABBA deadlock, the cpuset code handling
|
||||
* these user changes delegates the actual sched domain rebuilding
|
||||
* to a separate workqueue thread, which ends up processing the
|
||||
* above do_rebuild_sched_domains() function.
|
||||
*/
|
||||
static void async_rebuild_sched_domains(void)
|
||||
{
|
||||
schedule_work(&rebuild_sched_domains_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accomplishes the same scheduler domain rebuild as the above
|
||||
* async_rebuild_sched_domains(), however it directly calls the
|
||||
* rebuild routine synchronously rather than calling it via an
|
||||
* asynchronous work thread.
|
||||
*
|
||||
* This can only be called from code that is not holding
|
||||
* cgroup_mutex (not nested in a cgroup_lock() call.)
|
||||
*/
|
||||
void rebuild_sched_domains(void)
|
||||
{
|
||||
do_rebuild_sched_domains(NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -863,7 +932,7 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
|
||||
return retval;
|
||||
|
||||
if (is_load_balanced)
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1090,7 +1159,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
|
||||
if (val != cs->relax_domain_level) {
|
||||
cs->relax_domain_level = val;
|
||||
if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1131,7 +1200,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
if (cpus_nonempty && balance_flag_changed)
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1492,6 +1561,9 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Unreachable but makes gcc happy */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
|
||||
@@ -1504,6 +1576,9 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Unrechable but makes gcc happy */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1692,15 +1767,9 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking note on the strange update_flag() call below:
|
||||
*
|
||||
* If the cpuset being removed has its flag 'sched_load_balance'
|
||||
* enabled, then simulate turning sched_load_balance off, which
|
||||
* will call rebuild_sched_domains(). The get_online_cpus()
|
||||
* call in rebuild_sched_domains() must not be made while holding
|
||||
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
|
||||
* get_online_cpus() calls. So the reverse nesting would risk an
|
||||
* ABBA deadlock.
|
||||
* will call async_rebuild_sched_domains().
|
||||
*/
|
||||
|
||||
static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
@@ -1719,7 +1788,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
struct cgroup_subsys cpuset_subsys = {
|
||||
.name = "cpuset",
|
||||
.create = cpuset_create,
|
||||
.destroy = cpuset_destroy,
|
||||
.destroy = cpuset_destroy,
|
||||
.can_attach = cpuset_can_attach,
|
||||
.attach = cpuset_attach,
|
||||
.populate = cpuset_populate,
|
||||
@@ -1811,7 +1880,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
|
||||
}
|
||||
|
||||
/*
|
||||
* If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
|
||||
* If CPU and/or memory hotplug handlers, below, unplug any CPUs
|
||||
* or memory nodes, we need to walk over the cpuset hierarchy,
|
||||
* removing that CPU or node from all cpusets. If this removes the
|
||||
* last CPU or node from a cpuset, then move the tasks in the empty
|
||||
@@ -1902,35 +1971,6 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
|
||||
* cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
|
||||
* track what's online after any CPU or memory node hotplug or unplug event.
|
||||
*
|
||||
* Since there are two callers of this routine, one for CPU hotplug
|
||||
* events and one for memory node hotplug events, we could have coded
|
||||
* two separate routines here. We code it as a single common routine
|
||||
* in order to minimize text size.
|
||||
*/
|
||||
|
||||
static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
|
||||
{
|
||||
cgroup_lock();
|
||||
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
|
||||
/*
|
||||
* Scheduler destroys domains on hotplug events.
|
||||
* Rebuild them based on the current settings.
|
||||
*/
|
||||
if (rebuild_sd)
|
||||
rebuild_sched_domains();
|
||||
|
||||
cgroup_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* The top_cpuset tracks what CPUs and Memory Nodes are online,
|
||||
* period. This is necessary in order to make cpusets transparent
|
||||
@@ -1939,40 +1979,52 @@ static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
|
||||
*
|
||||
* This routine ensures that top_cpuset.cpus_allowed tracks
|
||||
* cpu_online_map on each CPU hotplug (cpuhp) event.
|
||||
*
|
||||
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
||||
* before calling generate_sched_domains().
|
||||
*/
|
||||
|
||||
static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
|
||||
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
||||
unsigned long phase, void *unused_cpu)
|
||||
{
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_t *doms;
|
||||
int ndoms;
|
||||
|
||||
switch (phase) {
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
common_cpu_mem_hotplug_unplug(1);
|
||||
break;
|
||||
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
cgroup_lock();
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
cgroup_unlock();
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
|
||||
* Call this routine anytime after you change
|
||||
* node_states[N_HIGH_MEMORY].
|
||||
* See also the previous routine cpuset_handle_cpuhp().
|
||||
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
|
||||
* See also the previous routine cpuset_track_online_cpus().
|
||||
*/
|
||||
|
||||
void cpuset_track_online_nodes(void)
|
||||
{
|
||||
common_cpu_mem_hotplug_unplug(0);
|
||||
cgroup_lock();
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
cgroup_unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1987,7 +2039,7 @@ void __init cpuset_init_smp(void)
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
|
||||
hotcpu_notifier(cpuset_handle_cpuhp, 0);
|
||||
hotcpu_notifier(cpuset_track_online_cpus, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
|
||||
* We won't ever get here for the group leader, since it
|
||||
* will have been the last reference on the signal_struct.
|
||||
*/
|
||||
sig->utime = cputime_add(sig->utime, tsk->utime);
|
||||
sig->stime = cputime_add(sig->stime, tsk->stime);
|
||||
sig->gtime = cputime_add(sig->gtime, tsk->gtime);
|
||||
sig->utime = cputime_add(sig->utime, task_utime(tsk));
|
||||
sig->stime = cputime_add(sig->stime, task_stime(tsk));
|
||||
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
|
||||
sig->min_flt += tsk->min_flt;
|
||||
sig->maj_flt += tsk->maj_flt;
|
||||
sig->nvcsw += tsk->nvcsw;
|
||||
|
@@ -4178,6 +4178,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
|
||||
cpustat->steal = cputime64_add(cpustat->steal, tmp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use precise platform statistics if available:
|
||||
*/
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||
cputime_t task_utime(struct task_struct *p)
|
||||
{
|
||||
return p->utime;
|
||||
}
|
||||
|
||||
cputime_t task_stime(struct task_struct *p)
|
||||
{
|
||||
return p->stime;
|
||||
}
|
||||
#else
|
||||
cputime_t task_utime(struct task_struct *p)
|
||||
{
|
||||
clock_t utime = cputime_to_clock_t(p->utime),
|
||||
total = utime + cputime_to_clock_t(p->stime);
|
||||
u64 temp;
|
||||
|
||||
/*
|
||||
* Use CFS's precise accounting:
|
||||
*/
|
||||
temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
|
||||
|
||||
if (total) {
|
||||
temp *= utime;
|
||||
do_div(temp, total);
|
||||
}
|
||||
utime = (clock_t)temp;
|
||||
|
||||
p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
|
||||
return p->prev_utime;
|
||||
}
|
||||
|
||||
cputime_t task_stime(struct task_struct *p)
|
||||
{
|
||||
clock_t stime;
|
||||
|
||||
/*
|
||||
* Use CFS's precise accounting. (we subtract utime from
|
||||
* the total, to make sure the total observed by userspace
|
||||
* grows monotonically - apps rely on that):
|
||||
*/
|
||||
stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
|
||||
cputime_to_clock_t(task_utime(p));
|
||||
|
||||
if (stime >= 0)
|
||||
p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
|
||||
|
||||
return p->prev_stime;
|
||||
}
|
||||
#endif
|
||||
|
||||
inline cputime_t task_gtime(struct task_struct *p)
|
||||
{
|
||||
return p->gtime;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function gets called by the timer code, with HZ frequency.
|
||||
* We call it with interrupts disabled.
|
||||
@@ -7637,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
|
||||
* and partition_sched_domains() will fallback to the single partition
|
||||
* 'fallback_doms', it also forces the domains to be rebuilt.
|
||||
*
|
||||
* If doms_new==NULL it will be replaced with cpu_online_map.
|
||||
* ndoms_new==0 is a special case for destroying existing domains.
|
||||
* It will not create the default domain.
|
||||
*
|
||||
* Call with hotplug lock held
|
||||
*/
|
||||
void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
||||
struct sched_domain_attr *dattr_new)
|
||||
{
|
||||
int i, j;
|
||||
int i, j, n;
|
||||
|
||||
mutex_lock(&sched_domains_mutex);
|
||||
|
||||
/* always unregister in case we don't destroy any domains */
|
||||
unregister_sched_domain_sysctl();
|
||||
|
||||
if (doms_new == NULL)
|
||||
ndoms_new = 0;
|
||||
n = doms_new ? ndoms_new : 0;
|
||||
|
||||
/* Destroy deleted domains */
|
||||
for (i = 0; i < ndoms_cur; i++) {
|
||||
for (j = 0; j < ndoms_new; j++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
if (cpus_equal(doms_cur[i], doms_new[j])
|
||||
&& dattrs_equal(dattr_cur, i, dattr_new, j))
|
||||
goto match1;
|
||||
@@ -7667,7 +7729,6 @@ match1:
|
||||
|
||||
if (doms_new == NULL) {
|
||||
ndoms_cur = 0;
|
||||
ndoms_new = 1;
|
||||
doms_new = &fallback_doms;
|
||||
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
|
||||
dattr_new = NULL;
|
||||
@@ -7704,8 +7765,13 @@ match2:
|
||||
int arch_reinit_sched_domains(void)
|
||||
{
|
||||
get_online_cpus();
|
||||
|
||||
/* Destroy domains first to force the rebuild */
|
||||
partition_sched_domains(0, NULL, NULL);
|
||||
|
||||
rebuild_sched_domains();
|
||||
put_online_cpus();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -7789,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb,
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
partition_sched_domains(0, NULL, NULL);
|
||||
partition_sched_domains(1, NULL, NULL);
|
||||
return NOTIFY_OK;
|
||||
|
||||
default:
|
||||
|
@@ -177,7 +177,7 @@ void clockevents_register_device(struct clock_event_device *dev)
|
||||
/*
|
||||
* Noop handler when we shut down an event device
|
||||
*/
|
||||
static void clockevents_handle_noop(struct clock_event_device *dev)
|
||||
void clockevents_handle_noop(struct clock_event_device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -199,7 +199,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
|
||||
* released list and do a notify add later.
|
||||
*/
|
||||
if (old) {
|
||||
old->event_handler = clockevents_handle_noop;
|
||||
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
|
||||
list_del(&old->list);
|
||||
list_add(&old->list, &clockevents_released);
|
||||
|
@@ -245,7 +245,7 @@ static void sync_cmos_clock(unsigned long dummy)
|
||||
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
|
||||
fail = update_persistent_clock(now);
|
||||
|
||||
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec;
|
||||
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
|
||||
if (next.tv_nsec <= 0)
|
||||
next.tv_nsec += NSEC_PER_SEC;
|
||||
|
||||
|
@@ -175,6 +175,8 @@ static void tick_do_periodic_broadcast(void)
|
||||
*/
|
||||
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
|
||||
{
|
||||
ktime_t next;
|
||||
|
||||
tick_do_periodic_broadcast();
|
||||
|
||||
/*
|
||||
@@ -185,10 +187,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
|
||||
|
||||
/*
|
||||
* Setup the next period for devices, which do not have
|
||||
* periodic mode:
|
||||
* periodic mode. We read dev->next_event first and add to it
|
||||
* when the event alrady expired. clockevents_program_event()
|
||||
* sets dev->next_event only when the event is really
|
||||
* programmed to the device.
|
||||
*/
|
||||
for (;;) {
|
||||
ktime_t next = ktime_add(dev->next_event, tick_period);
|
||||
for (next = dev->next_event; ;) {
|
||||
next = ktime_add(next, tick_period);
|
||||
|
||||
if (!clockevents_program_event(dev, next, ktime_get()))
|
||||
return;
|
||||
@@ -205,7 +210,7 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
struct clock_event_device *bc, *dev;
|
||||
struct tick_device *td;
|
||||
unsigned long flags, *reason = why;
|
||||
int cpu;
|
||||
int cpu, bc_stopped;
|
||||
|
||||
spin_lock_irqsave(&tick_broadcast_lock, flags);
|
||||
|
||||
@@ -223,6 +228,8 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
if (!tick_device_is_functional(dev))
|
||||
goto out;
|
||||
|
||||
bc_stopped = cpus_empty(tick_broadcast_mask);
|
||||
|
||||
switch (*reason) {
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
|
||||
@@ -245,9 +252,10 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
break;
|
||||
}
|
||||
|
||||
if (cpus_empty(tick_broadcast_mask))
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
else {
|
||||
if (cpus_empty(tick_broadcast_mask)) {
|
||||
if (!bc_stopped)
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
} else if (bc_stopped) {
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
|
||||
tick_broadcast_start_periodic(bc);
|
||||
else
|
||||
@@ -364,16 +372,8 @@ cpumask_t *tick_get_broadcast_oneshot_mask(void)
|
||||
static int tick_broadcast_set_event(ktime_t expires, int force)
|
||||
{
|
||||
struct clock_event_device *bc = tick_broadcast_device.evtdev;
|
||||
ktime_t now = ktime_get();
|
||||
int res;
|
||||
|
||||
for(;;) {
|
||||
res = clockevents_program_event(bc, expires, now);
|
||||
if (!res || !force)
|
||||
return res;
|
||||
now = ktime_get();
|
||||
expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
|
||||
}
|
||||
return tick_dev_program_event(bc, expires, force);
|
||||
}
|
||||
|
||||
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
|
||||
@@ -491,14 +491,52 @@ static void tick_broadcast_clear_oneshot(int cpu)
|
||||
cpu_clear(cpu, tick_broadcast_oneshot_mask);
|
||||
}
|
||||
|
||||
static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
|
||||
{
|
||||
struct tick_device *td;
|
||||
int cpu;
|
||||
|
||||
for_each_cpu_mask_nr(cpu, *mask) {
|
||||
td = &per_cpu(tick_cpu_device, cpu);
|
||||
if (td->evtdev)
|
||||
td->evtdev->next_event = expires;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_broadcast_setup_oneshot - setup the broadcast device
|
||||
*/
|
||||
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
|
||||
{
|
||||
bc->event_handler = tick_handle_oneshot_broadcast;
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
|
||||
bc->next_event.tv64 = KTIME_MAX;
|
||||
/* Set it up only once ! */
|
||||
if (bc->event_handler != tick_handle_oneshot_broadcast) {
|
||||
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
|
||||
int cpu = smp_processor_id();
|
||||
cpumask_t mask;
|
||||
|
||||
bc->event_handler = tick_handle_oneshot_broadcast;
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
|
||||
|
||||
/* Take the do_timer update */
|
||||
tick_do_timer_cpu = cpu;
|
||||
|
||||
/*
|
||||
* We must be careful here. There might be other CPUs
|
||||
* waiting for periodic broadcast. We need to set the
|
||||
* oneshot_mask bits for those and program the
|
||||
* broadcast device to fire.
|
||||
*/
|
||||
mask = tick_broadcast_mask;
|
||||
cpu_clear(cpu, mask);
|
||||
cpus_or(tick_broadcast_oneshot_mask,
|
||||
tick_broadcast_oneshot_mask, mask);
|
||||
|
||||
if (was_periodic && !cpus_empty(mask)) {
|
||||
tick_broadcast_init_next_event(&mask, tick_next_period);
|
||||
tick_broadcast_set_event(tick_next_period, 1);
|
||||
} else
|
||||
bc->next_event.tv64 = KTIME_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -161,6 +161,7 @@ static void tick_setup_device(struct tick_device *td,
|
||||
} else {
|
||||
handler = td->evtdev->event_handler;
|
||||
next_event = td->evtdev->next_event;
|
||||
td->evtdev->event_handler = clockevents_handle_noop;
|
||||
}
|
||||
|
||||
td->evtdev = newdev;
|
||||
|
@@ -17,6 +17,8 @@ extern void tick_handle_periodic(struct clock_event_device *dev);
|
||||
extern void tick_setup_oneshot(struct clock_event_device *newdev,
|
||||
void (*handler)(struct clock_event_device *),
|
||||
ktime_t nextevt);
|
||||
extern int tick_dev_program_event(struct clock_event_device *dev,
|
||||
ktime_t expires, int force);
|
||||
extern int tick_program_event(ktime_t expires, int force);
|
||||
extern void tick_oneshot_notify(void);
|
||||
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
|
||||
|
@@ -22,22 +22,54 @@
|
||||
|
||||
#include "tick-internal.h"
|
||||
|
||||
/**
|
||||
* tick_program_event internal worker function
|
||||
*/
|
||||
int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
|
||||
int force)
|
||||
{
|
||||
ktime_t now = ktime_get();
|
||||
int i;
|
||||
|
||||
for (i = 0;;) {
|
||||
int ret = clockevents_program_event(dev, expires, now);
|
||||
|
||||
if (!ret || !force)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We tried 2 times to program the device with the given
|
||||
* min_delta_ns. If that's not working then we double it
|
||||
* and emit a warning.
|
||||
*/
|
||||
if (++i > 2) {
|
||||
/* Increase the min. delta and try again */
|
||||
if (!dev->min_delta_ns)
|
||||
dev->min_delta_ns = 5000;
|
||||
else
|
||||
dev->min_delta_ns += dev->min_delta_ns >> 1;
|
||||
|
||||
printk(KERN_WARNING
|
||||
"CE: %s increasing min_delta_ns to %lu nsec\n",
|
||||
dev->name ? dev->name : "?",
|
||||
dev->min_delta_ns << 1);
|
||||
|
||||
i = 0;
|
||||
}
|
||||
|
||||
now = ktime_get();
|
||||
expires = ktime_add_ns(now, dev->min_delta_ns);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_program_event
|
||||
*/
|
||||
int tick_program_event(ktime_t expires, int force)
|
||||
{
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
ktime_t now = ktime_get();
|
||||
|
||||
while (1) {
|
||||
int ret = clockevents_program_event(dev, expires, now);
|
||||
|
||||
if (!ret || !force)
|
||||
return ret;
|
||||
now = ktime_get();
|
||||
expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
|
||||
}
|
||||
return tick_dev_program_event(dev, expires, force);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -61,7 +93,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
|
||||
{
|
||||
newdev->event_handler = handler;
|
||||
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
|
||||
clockevents_program_event(newdev, next_event, ktime_get());
|
||||
tick_dev_program_event(newdev, next_event, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -162,6 +162,8 @@ void tick_nohz_stop_idle(int cpu)
|
||||
ts->idle_lastupdate = now;
|
||||
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
|
||||
ts->idle_active = 0;
|
||||
|
||||
sched_clock_idle_wakeup_event(0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -177,6 +179,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
|
||||
}
|
||||
ts->idle_entrytime = now;
|
||||
ts->idle_active = 1;
|
||||
sched_clock_idle_sleep_event();
|
||||
return now;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user