Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6

Conflicts:

	arch/sparc64/kernel/pci_psycho.c
This commit is contained in:
David S. Miller
2008-09-16 14:11:43 -07:00
1002 changed files with 34796 additions and 10680 deletions

View File

@@ -243,10 +243,11 @@ static inline int open_arg(int flags, int mask)
static int audit_match_perm(struct audit_context *ctx, int mask)
{
unsigned n;
if (unlikely(!ctx))
return 0;
unsigned n = ctx->major;
n = ctx->major;
switch (audit_classify_syscall(ctx->arch, n)) {
case 0: /* native */
if ((mask & AUDIT_PERM_WRITE) &&

View File

@@ -14,6 +14,8 @@
* 2003-10-22 Updates by Stephen Hemminger.
* 2004 May-July Rework by Paul Jackson.
* 2006 Rework by Paul Menage to use generic cgroups
* 2008 Rework of the scheduler domains and CPU hotplug handling
* by Max Krasnyansky
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of the Linux
@@ -236,9 +238,11 @@ static struct cpuset top_cpuset = {
static DEFINE_MUTEX(callback_mutex);
/* This is ugly, but preserves the userspace API for existing cpuset
/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
* silently switch it to mount "cgroup" instead */
* silently switch it to mount "cgroup" instead
*/
static int cpuset_get_sb(struct file_system_type *fs_type,
int flags, const char *unused_dev_name,
void *data, struct vfsmount *mnt)
@@ -473,10 +477,9 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
}
/*
* Helper routine for rebuild_sched_domains().
* Helper routine for generate_sched_domains().
* Do cpusets a, b have overlapping cpus_allowed masks?
*/
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
@@ -518,26 +521,15 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
}
/*
* rebuild_sched_domains()
* generate_sched_domains()
*
* This routine will be called to rebuild the scheduler's dynamic
* sched domains:
* - if the flag 'sched_load_balance' of any cpuset with non-empty
* 'cpus' changes,
* - or if the 'cpus' allowed changes in any cpuset which has that
* flag enabled,
* - or if the 'sched_relax_domain_level' of any cpuset which has
* that flag enabled and with non-empty 'cpus' changes,
* - or if any cpuset with non-empty 'cpus' is removed,
* - or if a cpu gets offlined.
*
* This routine builds a partial partition of the systems CPUs
* (the set of non-overlappping cpumask_t's in the array 'part'
* below), and passes that partial partition to the kernel/sched.c
* partition_sched_domains() routine, which will rebuild the
* schedulers load balancing domains (sched domains) as specified
* by that partial partition. A 'partial partition' is a set of
* non-overlapping subsets whose union is a subset of that set.
* This function builds a partial partition of the systems CPUs
* A 'partial partition' is a set of non-overlapping subsets whose
* union is a subset of that set.
* The output of this function needs to be passed to kernel/sched.c
* partition_sched_domains() routine, which will rebuild the scheduler's
* load balancing domains (sched domains) as specified by that partial
* partition.
*
* See "What is sched_load_balance" in Documentation/cpusets.txt
* for a background explanation of this.
@@ -547,13 +539,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* domains when operating in the severe memory shortage situations
* that could cause allocation failures below.
*
* Call with cgroup_mutex held. May take callback_mutex during
* call due to the kfifo_alloc() and kmalloc() calls. May nest
* a call to the get_online_cpus()/put_online_cpus() pair.
* Must not be called holding callback_mutex, because we must not
* call get_online_cpus() while holding callback_mutex. Elsewhere
* the kernel nests callback_mutex inside get_online_cpus() calls.
* So the reverse nesting would risk an ABBA deadlock.
* Must be called with cgroup_lock held.
*
* The three key local variables below are:
* q - a linked-list queue of cpuset pointers, used to implement a
@@ -588,10 +574,10 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to
* partition_sched_domains().
*/
void rebuild_sched_domains(void)
static int generate_sched_domains(cpumask_t **domains,
struct sched_domain_attr **attributes)
{
LIST_HEAD(q); /* queue of cpusets to be scanned*/
LIST_HEAD(q); /* queue of cpusets to be scanned */
struct cpuset *cp; /* scans q */
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
@@ -601,23 +587,26 @@ void rebuild_sched_domains(void)
int ndoms; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
csa = NULL;
ndoms = 0;
doms = NULL;
dattr = NULL;
csa = NULL;
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
ndoms = 1;
doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
if (!doms)
goto rebuild;
goto done;
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
if (dattr) {
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
*doms = top_cpuset.cpus_allowed;
goto rebuild;
ndoms = 1;
goto done;
}
csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
@@ -680,61 +669,141 @@ restart:
}
}
/* Convert <csn, csa> to <ndoms, doms> */
/*
* Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
if (!doms)
goto rebuild;
if (!doms) {
ndoms = 0;
goto done;
}
/*
* The rest of the code, including the scheduler, can deal with
* dattr==NULL case. No need to abort if alloc fails.
*/
dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
for (nslot = 0, i = 0; i < csn; i++) {
struct cpuset *a = csa[i];
cpumask_t *dp;
int apn = a->pn;
if (apn >= 0) {
cpumask_t *dp = doms + nslot;
if (nslot == ndoms) {
static int warnings = 10;
if (warnings) {
printk(KERN_WARNING
"rebuild_sched_domains confused:"
" nslot %d, ndoms %d, csn %d, i %d,"
" apn %d\n",
nslot, ndoms, csn, i, apn);
warnings--;
}
continue;
}
cpus_clear(*dp);
if (dattr)
*(dattr + nslot) = SD_ATTR_INIT;
for (j = i; j < csn; j++) {
struct cpuset *b = csa[j];
if (apn == b->pn) {
cpus_or(*dp, *dp, b->cpus_allowed);
b->pn = -1;
if (dattr)
update_domain_attr_tree(dattr
+ nslot, b);
}
}
nslot++;
if (apn < 0) {
/* Skip completed partitions */
continue;
}
dp = doms + nslot;
if (nslot == ndoms) {
static int warnings = 10;
if (warnings) {
printk(KERN_WARNING
"rebuild_sched_domains confused:"
" nslot %d, ndoms %d, csn %d, i %d,"
" apn %d\n",
nslot, ndoms, csn, i, apn);
warnings--;
}
continue;
}
cpus_clear(*dp);
if (dattr)
*(dattr + nslot) = SD_ATTR_INIT;
for (j = i; j < csn; j++) {
struct cpuset *b = csa[j];
if (apn == b->pn) {
cpus_or(*dp, *dp, b->cpus_allowed);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
/* Done with this partition */
b->pn = -1;
}
}
nslot++;
}
BUG_ON(nslot != ndoms);
rebuild:
/* Have scheduler rebuild sched domains */
get_online_cpus();
partition_sched_domains(ndoms, doms, dattr);
put_online_cpus();
done:
kfree(csa);
/* Don't kfree(doms) -- partition_sched_domains() does that. */
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
*domains = doms;
*attributes = dattr;
return ndoms;
}
/*
* Rebuild scheduler domains.
*
* Call with neither cgroup_mutex held nor within get_online_cpus().
* Takes both cgroup_mutex and get_online_cpus().
*
* Cannot be directly called from cpuset code handling changes
* to the cpuset pseudo-filesystem, because it cannot be called
* from code that already holds cgroup_mutex.
*/
static void do_rebuild_sched_domains(struct work_struct *unused)
{
struct sched_domain_attr *attr;
cpumask_t *doms;
int ndoms;
get_online_cpus();
/* Generate domain masks and attrs */
cgroup_lock();
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);
put_online_cpus();
}
static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
/*
* Rebuild scheduler domains, asynchronously via workqueue.
*
* If the flag 'sched_load_balance' of any cpuset with non-empty
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
* which has that flag enabled, or if any cpuset with a non-empty
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
* The rebuild_sched_domains() and partition_sched_domains()
* routines must nest cgroup_lock() inside get_online_cpus(),
* but such cpuset changes as these must nest that locking the
* other way, holding cgroup_lock() for much of the code.
*
* So in order to avoid an ABBA deadlock, the cpuset code handling
* these user changes delegates the actual sched domain rebuilding
* to a separate workqueue thread, which ends up processing the
* above do_rebuild_sched_domains() function.
*/
static void async_rebuild_sched_domains(void)
{
schedule_work(&rebuild_sched_domains_work);
}
/*
* Accomplishes the same scheduler domain rebuild as the above
* async_rebuild_sched_domains(), however it directly calls the
* rebuild routine synchronously rather than calling it via an
* asynchronous work thread.
*
* This can only be called from code that is not holding
* cgroup_mutex (not nested in a cgroup_lock() call.)
*/
void rebuild_sched_domains(void)
{
do_rebuild_sched_domains(NULL);
}
/**
@@ -774,37 +843,25 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
/**
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
*
* Called with cgroup_mutex held
*
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
* calling callback functions for each.
*
* Return 0 if successful, -errno if not.
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
* if @heap != NULL.
*/
static int update_tasks_cpumask(struct cpuset *cs)
static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
{
struct cgroup_scanner scan;
struct ptr_heap heap;
int retval;
/*
* cgroup_scan_tasks() will initialize heap->gt for us.
* heap_init() is still needed here for we should not change
* cs->cpus_allowed when heap_init() fails.
*/
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
scan.cg = cs->css.cgroup;
scan.test_task = cpuset_test_cpumask;
scan.process_task = cpuset_change_cpumask;
scan.heap = &heap;
retval = cgroup_scan_tasks(&scan);
heap_free(&heap);
return retval;
scan.heap = heap;
cgroup_scan_tasks(&scan);
}
/**
@@ -814,6 +871,7 @@ static int update_tasks_cpumask(struct cpuset *cs)
*/
static int update_cpumask(struct cpuset *cs, const char *buf)
{
struct ptr_heap heap;
struct cpuset trialcs;
int retval;
int is_load_balanced;
@@ -848,6 +906,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
return 0;
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
is_load_balanced = is_sched_load_balance(&trialcs);
mutex_lock(&callback_mutex);
@@ -858,12 +920,12 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
* Scan tasks in the cpuset, and update the cpumasks of any
* that need an update.
*/
retval = update_tasks_cpumask(cs);
if (retval < 0)
return retval;
update_tasks_cpumask(cs, &heap);
heap_free(&heap);
if (is_load_balanced)
rebuild_sched_domains();
async_rebuild_sched_domains();
return 0;
}
@@ -1090,7 +1152,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
rebuild_sched_domains();
async_rebuild_sched_domains();
}
return 0;
@@ -1131,7 +1193,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
mutex_unlock(&callback_mutex);
if (cpus_nonempty && balance_flag_changed)
rebuild_sched_domains();
async_rebuild_sched_domains();
return 0;
}
@@ -1492,6 +1554,9 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
default:
BUG();
}
/* Unreachable but makes gcc happy */
return 0;
}
static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
@@ -1504,6 +1569,9 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
default:
BUG();
}
/* Unrechable but makes gcc happy */
return 0;
}
@@ -1692,15 +1760,9 @@ static struct cgroup_subsys_state *cpuset_create(
}
/*
* Locking note on the strange update_flag() call below:
*
* If the cpuset being removed has its flag 'sched_load_balance'
* enabled, then simulate turning sched_load_balance off, which
* will call rebuild_sched_domains(). The get_online_cpus()
* call in rebuild_sched_domains() must not be made while holding
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
* get_online_cpus() calls. So the reverse nesting would risk an
* ABBA deadlock.
* will call async_rebuild_sched_domains().
*/
static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
@@ -1719,7 +1781,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
struct cgroup_subsys cpuset_subsys = {
.name = "cpuset",
.create = cpuset_create,
.destroy = cpuset_destroy,
.destroy = cpuset_destroy,
.can_attach = cpuset_can_attach,
.attach = cpuset_attach,
.populate = cpuset_populate,
@@ -1811,7 +1873,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
}
/*
* If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
* If CPU and/or memory hotplug handlers, below, unplug any CPUs
* or memory nodes, we need to walk over the cpuset hierarchy,
* removing that CPU or node from all cpusets. If this removes the
* last CPU or node from a cpuset, then move the tasks in the empty
@@ -1896,41 +1958,12 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp);
update_tasks_cpumask(cp, NULL);
update_tasks_nodemask(cp, &oldmems);
}
}
}
/*
* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
* cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
* track what's online after any CPU or memory node hotplug or unplug event.
*
* Since there are two callers of this routine, one for CPU hotplug
* events and one for memory node hotplug events, we could have coded
* two separate routines here. We code it as a single common routine
* in order to minimize text size.
*/
static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
{
cgroup_lock();
top_cpuset.cpus_allowed = cpu_online_map;
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
scan_for_empty_cpusets(&top_cpuset);
/*
* Scheduler destroys domains on hotplug events.
* Rebuild them based on the current settings.
*/
if (rebuild_sd)
rebuild_sched_domains();
cgroup_unlock();
}
/*
* The top_cpuset tracks what CPUs and Memory Nodes are online,
* period. This is necessary in order to make cpusets transparent
@@ -1939,40 +1972,52 @@ static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
*
* This routine ensures that top_cpuset.cpus_allowed tracks
* cpu_online_map on each CPU hotplug (cpuhp) event.
*
* Called within get_online_cpus(). Needs to call cgroup_lock()
* before calling generate_sched_domains().
*/
static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
{
struct sched_domain_attr *attr;
cpumask_t *doms;
int ndoms;
switch (phase) {
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
common_cpu_mem_hotplug_unplug(1);
break;
default:
return NOTIFY_DONE;
}
cgroup_lock();
top_cpuset.cpus_allowed = cpu_online_map;
scan_for_empty_cpusets(&top_cpuset);
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
/* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr);
return NOTIFY_OK;
}
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
* Call this routine anytime after you change
* node_states[N_HIGH_MEMORY].
* See also the previous routine cpuset_handle_cpuhp().
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
* See also the previous routine cpuset_track_online_cpus().
*/
void cpuset_track_online_nodes(void)
{
common_cpu_mem_hotplug_unplug(0);
cgroup_lock();
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
scan_for_empty_cpusets(&top_cpuset);
cgroup_unlock();
}
#endif
@@ -1987,7 +2032,7 @@ void __init cpuset_init_smp(void)
top_cpuset.cpus_allowed = cpu_online_map;
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_handle_cpuhp, 0);
hotcpu_notifier(cpuset_track_online_cpus, 0);
}
/**

View File

@@ -112,9 +112,9 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
sig->utime = cputime_add(sig->utime, tsk->utime);
sig->stime = cputime_add(sig->stime, tsk->stime);
sig->gtime = cputime_add(sig->gtime, tsk->gtime);
sig->utime = cputime_add(sig->utime, task_utime(tsk));
sig->stime = cputime_add(sig->stime, task_stime(tsk));
sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@@ -831,26 +831,50 @@ static void reparent_thread(struct task_struct *p, struct task_struct *father)
* the child reaper process (ie "init") in our pid
* space.
*/
static struct task_struct *find_new_reaper(struct task_struct *father)
{
struct pid_namespace *pid_ns = task_active_pid_ns(father);
struct task_struct *thread;
thread = father;
while_each_thread(father, thread) {
if (thread->flags & PF_EXITING)
continue;
if (unlikely(pid_ns->child_reaper == father))
pid_ns->child_reaper = thread;
return thread;
}
if (unlikely(pid_ns->child_reaper == father)) {
write_unlock_irq(&tasklist_lock);
if (unlikely(pid_ns == &init_pid_ns))
panic("Attempted to kill init!");
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
/*
* We can not clear ->child_reaper or leave it alone.
* There may by stealth EXIT_DEAD tasks on ->children,
* forget_original_parent() must move them somewhere.
*/
pid_ns->child_reaper = init_pid_ns.child_reaper;
}
return pid_ns->child_reaper;
}
static void forget_original_parent(struct task_struct *father)
{
struct task_struct *p, *n, *reaper = father;
struct task_struct *p, *n, *reaper;
LIST_HEAD(ptrace_dead);
write_lock_irq(&tasklist_lock);
reaper = find_new_reaper(father);
/*
* First clean up ptrace if we were using it.
*/
ptrace_exit(father, &ptrace_dead);
do {
reaper = next_thread(reaper);
if (reaper == father) {
reaper = task_child_reaper(father);
break;
}
} while (reaper->flags & PF_EXITING);
list_for_each_entry_safe(p, n, &father->children, sibling) {
p->real_parent = reaper;
if (p->parent == father) {
@@ -918,8 +942,8 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
/* mt-exec, de_thread() is waiting for us */
if (thread_group_leader(tsk) &&
tsk->signal->notify_count < 0 &&
tsk->signal->group_exit_task)
tsk->signal->group_exit_task &&
tsk->signal->notify_count < 0)
wake_up_process(tsk->signal->group_exit_task);
write_unlock_irq(&tasklist_lock);
@@ -959,39 +983,6 @@ static void check_stack_usage(void)
static inline void check_stack_usage(void) {}
#endif
static inline void exit_child_reaper(struct task_struct *tsk)
{
if (likely(tsk->group_leader != task_child_reaper(tsk)))
return;
if (tsk->nsproxy->pid_ns == &init_pid_ns)
panic("Attempted to kill init!");
/*
* @tsk is the last thread in the 'cgroup-init' and is exiting.
* Terminate all remaining processes in the namespace and reap them
* before exiting @tsk.
*
* Note that @tsk (last thread of cgroup-init) may not necessarily
* be the child-reaper (i.e main thread of cgroup-init) of the
* namespace i.e the child_reaper may have already exited.
*
* Even after a child_reaper exits, we let it inherit orphaned children,
* because, pid_ns->child_reaper remains valid as long as there is
* at least one living sub-thread in the cgroup init.
* This living sub-thread of the cgroup-init will be notified when
* a child inherited by the 'child-reaper' exits (do_notify_parent()
* uses __group_send_sig_info()). Further, when reaping child processes,
* do_wait() iterates over children of all living sub threads.
* i.e even though 'child_reaper' thread is listed as the parent of the
* orphaned children, any living sub-thread in the cgroup-init can
* perform the role of the child_reaper.
*/
zap_pid_ns_processes(tsk->nsproxy->pid_ns);
}
NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
@@ -1051,7 +1042,6 @@ NORET_TYPE void do_exit(long code)
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
exit_child_reaper(tsk);
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
}

View File

@@ -875,11 +875,11 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
if (!entry)
return 0;
entry->class = this;
entry->distance = distance;
if (!save_trace(&entry->trace))
return 0;
entry->class = this;
entry->distance = distance;
/*
* Since we never remove from the dependency list, the list can
* be walked lockless by other CPUs, it's only allocation
@@ -3029,7 +3029,7 @@ found_it:
stats = get_lock_stats(hlock_class(hlock));
if (point < ARRAY_SIZE(stats->contention_point))
stats->contention_point[i]++;
stats->contention_point[point]++;
if (lock->cpu != smp_processor_id())
stats->bounces[bounce_contended + !!hlock->read]++;
put_lock_stats(stats);

View File

@@ -472,8 +472,9 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr)
{
unsigned long rem;
nr += 5; /* for display rounding */
rem = do_div(nr, 1000); /* XXX: do_div_signed */
snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, ((int)rem+5)/10);
snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10);
}
static void seq_time(struct seq_file *m, s64 time)

View File

@@ -1799,7 +1799,7 @@ static void *module_alloc_update_bounds(unsigned long size)
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static struct module *load_module(void __user *umod,
static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{

View File

@@ -179,9 +179,6 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
rc = sys_wait4(-1, NULL, __WALL, NULL);
} while (rc != -ECHILD);
/* Child reaper for the pid namespace is going away */
pid_ns->child_reaper = NULL;
acct_exit_ns(pid_ns);
return;
}

View File

@@ -43,7 +43,7 @@
#include <linux/uaccess.h>
/*
* locking rule: all changes to target_value or requirements or notifiers lists
* locking rule: all changes to requirements or notifiers lists
* or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
* held, taken with _irqsave. One lock to rule them all
*/
@@ -66,7 +66,7 @@ struct pm_qos_object {
struct miscdevice pm_qos_power_miscdev;
char *name;
s32 default_value;
s32 target_value;
atomic_t target_value;
s32 (*comparitor)(s32, s32);
};
@@ -77,7 +77,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
.notifiers = &cpu_dma_lat_notifier,
.name = "cpu_dma_latency",
.default_value = 2000 * USEC_PER_SEC,
.target_value = 2000 * USEC_PER_SEC,
.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
.comparitor = min_compare
};
@@ -87,7 +87,7 @@ static struct pm_qos_object network_lat_pm_qos = {
.notifiers = &network_lat_notifier,
.name = "network_latency",
.default_value = 2000 * USEC_PER_SEC,
.target_value = 2000 * USEC_PER_SEC,
.target_value = ATOMIC_INIT(2000 * USEC_PER_SEC),
.comparitor = min_compare
};
@@ -99,7 +99,7 @@ static struct pm_qos_object network_throughput_pm_qos = {
.notifiers = &network_throughput_notifier,
.name = "network_throughput",
.default_value = 0,
.target_value = 0,
.target_value = ATOMIC_INIT(0),
.comparitor = max_compare
};
@@ -150,11 +150,11 @@ static void update_target(int target)
extreme_value = pm_qos_array[target]->comparitor(
extreme_value, node->value);
}
if (pm_qos_array[target]->target_value != extreme_value) {
if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
call_notifier = 1;
pm_qos_array[target]->target_value = extreme_value;
atomic_set(&pm_qos_array[target]->target_value, extreme_value);
pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
pm_qos_array[target]->target_value);
atomic_read(&pm_qos_array[target]->target_value));
}
spin_unlock_irqrestore(&pm_qos_lock, flags);
@@ -193,14 +193,7 @@ static int find_pm_qos_object_by_minor(int minor)
*/
int pm_qos_requirement(int pm_qos_class)
{
int ret_val;
unsigned long flags;
spin_lock_irqsave(&pm_qos_lock, flags);
ret_val = pm_qos_array[pm_qos_class]->target_value;
spin_unlock_irqrestore(&pm_qos_lock, flags);
return ret_val;
return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
}
EXPORT_SYMBOL_GPL(pm_qos_requirement);

View File

@@ -21,6 +21,7 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include "power.h"
@@ -255,7 +256,7 @@ static int create_image(int platform_mode)
int hibernation_snapshot(int platform_mode)
{
int error;
int error, ftrace_save;
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
@@ -267,6 +268,7 @@ int hibernation_snapshot(int platform_mode)
goto Close;
suspend_console();
ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_FREEZE);
if (error)
goto Recover_platform;
@@ -296,6 +298,7 @@ int hibernation_snapshot(int platform_mode)
Resume_devices:
device_resume(in_suspend ?
(error ? PMSG_RECOVER : PMSG_THAW) : PMSG_RESTORE);
__ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
platform_end(platform_mode);
@@ -366,10 +369,11 @@ static int resume_target_kernel(void)
int hibernation_restore(int platform_mode)
{
int error;
int error, ftrace_save;
pm_prepare_console();
suspend_console();
ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_QUIESCE);
if (error)
goto Finish;
@@ -384,6 +388,7 @@ int hibernation_restore(int platform_mode)
platform_restore_cleanup(platform_mode);
device_resume(PMSG_RECOVER);
Finish:
__ftrace_enabled_restore(ftrace_save);
resume_console();
pm_restore_console();
return error;
@@ -396,7 +401,7 @@ int hibernation_restore(int platform_mode)
int hibernation_platform_enter(void)
{
int error;
int error, ftrace_save;
if (!hibernation_ops)
return -ENOSYS;
@@ -411,6 +416,7 @@ int hibernation_platform_enter(void)
goto Close;
suspend_console();
ftrace_save = __ftrace_enabled_save();
error = device_suspend(PMSG_HIBERNATE);
if (error) {
if (hibernation_ops->recover)
@@ -445,6 +451,7 @@ int hibernation_platform_enter(void)
hibernation_ops->finish();
Resume_devices:
device_resume(PMSG_RESTORE);
__ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
hibernation_ops->end();

View File

@@ -21,6 +21,7 @@
#include <linux/freezer.h>
#include <linux/vmstat.h>
#include <linux/syscalls.h>
#include <linux/ftrace.h>
#include "power.h"
@@ -310,7 +311,7 @@ static int suspend_enter(suspend_state_t state)
*/
int suspend_devices_and_enter(suspend_state_t state)
{
int error;
int error, ftrace_save;
if (!suspend_ops)
return -ENOSYS;
@@ -321,6 +322,7 @@ int suspend_devices_and_enter(suspend_state_t state)
goto Close;
}
suspend_console();
ftrace_save = __ftrace_enabled_save();
suspend_test_start();
error = device_suspend(PMSG_SUSPEND);
if (error) {
@@ -352,6 +354,7 @@ int suspend_devices_and_enter(suspend_state_t state)
suspend_test_start();
device_resume(PMSG_RESUME);
suspend_test_finish("resume devices");
__ftrace_enabled_restore(ftrace_save);
resume_console();
Close:
if (suspend_ops->end)

View File

@@ -362,35 +362,21 @@ int allocate_resource(struct resource *root, struct resource *new,
EXPORT_SYMBOL(allocate_resource);
/**
* insert_resource - Inserts a resource in the resource tree
* @parent: parent of the new resource
* @new: new resource to insert
*
* Returns 0 on success, -EBUSY if the resource can't be inserted.
*
* This function is equivalent to request_resource when no conflict
* happens. If a conflict happens, and the conflicting resources
* entirely fit within the range of the new resource, then the new
* resource is inserted and the conflicting resources become children of
* the new resource.
/*
* Insert a resource into the resource tree. If successful, return NULL,
* otherwise return the conflicting resource (compare to __request_resource())
*/
int insert_resource(struct resource *parent, struct resource *new)
static struct resource * __insert_resource(struct resource *parent, struct resource *new)
{
int result;
struct resource *first, *next;
write_lock(&resource_lock);
for (;; parent = first) {
result = 0;
first = __request_resource(parent, new);
if (!first)
goto out;
return first;
result = -EBUSY;
if (first == parent)
goto out;
return first;
if ((first->start > new->start) || (first->end < new->end))
break;
@@ -401,15 +387,13 @@ int insert_resource(struct resource *parent, struct resource *new)
for (next = first; ; next = next->sibling) {
/* Partial overlap? Bad, and unfixable */
if (next->start < new->start || next->end > new->end)
goto out;
return next;
if (!next->sibling)
break;
if (next->sibling->start > new->end)
break;
}
result = 0;
new->parent = parent;
new->sibling = next->sibling;
new->child = first;
@@ -426,10 +410,64 @@ int insert_resource(struct resource *parent, struct resource *new)
next = next->sibling;
next->sibling = new;
}
return NULL;
}
out:
/**
* insert_resource - Inserts a resource in the resource tree
* @parent: parent of the new resource
* @new: new resource to insert
*
* Returns 0 on success, -EBUSY if the resource can't be inserted.
*
* This function is equivalent to request_resource when no conflict
* happens. If a conflict happens, and the conflicting resources
* entirely fit within the range of the new resource, then the new
* resource is inserted and the conflicting resources become children of
* the new resource.
*/
int insert_resource(struct resource *parent, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __insert_resource(parent, new);
write_unlock(&resource_lock);
return conflict ? -EBUSY : 0;
}
/**
* insert_resource_expand_to_fit - Insert a resource into the resource tree
* @root: root resource descriptor
* @new: new resource to insert
*
* Insert a resource into the resource tree, possibly expanding it in order
* to make it encompass any conflicting resources.
*/
void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
{
if (new->parent)
return;
write_lock(&resource_lock);
for (;;) {
struct resource *conflict;
conflict = __insert_resource(root, new);
if (!conflict)
break;
if (conflict == root)
break;
/* Ok, expand resource to cover the conflict, then try again .. */
if (conflict->start < new->start)
new->start = conflict->start;
if (conflict->end > new->end)
new->end = conflict->end;
printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
}
write_unlock(&resource_lock);
return result;
}
/**

View File

@@ -4178,6 +4178,65 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
cpustat->steal = cputime64_add(cpustat->steal, tmp);
}
/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
cputime_t task_utime(struct task_struct *p)
{
return p->utime;
}
cputime_t task_stime(struct task_struct *p)
{
return p->stime;
}
#else
cputime_t task_utime(struct task_struct *p)
{
clock_t utime = cputime_to_clock_t(p->utime),
total = utime + cputime_to_clock_t(p->stime);
u64 temp;
/*
* Use CFS's precise accounting:
*/
temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
if (total) {
temp *= utime;
do_div(temp, total);
}
utime = (clock_t)temp;
p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
return p->prev_utime;
}
cputime_t task_stime(struct task_struct *p)
{
clock_t stime;
/*
* Use CFS's precise accounting. (we subtract utime from
* the total, to make sure the total observed by userspace
* grows monotonically - apps rely on that):
*/
stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
cputime_to_clock_t(task_utime(p));
if (stime >= 0)
p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
return p->prev_stime;
}
#endif
inline cputime_t task_gtime(struct task_struct *p)
{
return p->gtime;
}
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
@@ -7637,24 +7696,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
* and partition_sched_domains() will fallback to the single partition
* 'fallback_doms', it also forces the domains to be rebuilt.
*
* If doms_new==NULL it will be replaced with cpu_online_map.
* ndoms_new==0 is a special case for destroying existing domains.
* It will not create the default domain.
*
* Call with hotplug lock held
*/
void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
struct sched_domain_attr *dattr_new)
{
int i, j;
int i, j, n;
mutex_lock(&sched_domains_mutex);
/* always unregister in case we don't destroy any domains */
unregister_sched_domain_sysctl();
if (doms_new == NULL)
ndoms_new = 0;
n = doms_new ? ndoms_new : 0;
/* Destroy deleted domains */
for (i = 0; i < ndoms_cur; i++) {
for (j = 0; j < ndoms_new; j++) {
for (j = 0; j < n; j++) {
if (cpus_equal(doms_cur[i], doms_new[j])
&& dattrs_equal(dattr_cur, i, dattr_new, j))
goto match1;
@@ -7667,7 +7729,6 @@ match1:
if (doms_new == NULL) {
ndoms_cur = 0;
ndoms_new = 1;
doms_new = &fallback_doms;
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
dattr_new = NULL;
@@ -7704,8 +7765,13 @@ match2:
int arch_reinit_sched_domains(void)
{
get_online_cpus();
/* Destroy domains first to force the rebuild */
partition_sched_domains(0, NULL, NULL);
rebuild_sched_domains();
put_online_cpus();
return 0;
}
@@ -7789,7 +7855,7 @@ static int update_sched_domains(struct notifier_block *nfb,
case CPU_ONLINE_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
partition_sched_domains(0, NULL, NULL);
partition_sched_domains(1, NULL, NULL);
return NOTIFY_OK;
default:

View File

@@ -12,19 +12,17 @@
*
* Create a semi stable clock from a mixture of other events, including:
* - gtod
* - jiffies
* - sched_clock()
* - explicit idle events
*
* We use gtod as base and the unstable clock deltas. The deltas are filtered,
* making it monotonic and keeping it within an expected window. This window
* is set up using jiffies.
* making it monotonic and keeping it within an expected window.
*
* Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped).
*
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 1 jiffies difference).
* consistent between cpus (never more than 2 jiffies difference).
*/
#include <linux/sched.h>
#include <linux/percpu.h>
@@ -54,7 +52,6 @@ struct sched_clock_data {
*/
raw_spinlock_t lock;
unsigned long tick_jiffies;
u64 tick_raw;
u64 tick_gtod;
u64 clock;
@@ -75,14 +72,12 @@ static inline struct sched_clock_data *cpu_sdc(int cpu)
void sched_clock_init(void)
{
u64 ktime_now = ktime_to_ns(ktime_get());
unsigned long now_jiffies = jiffies;
int cpu;
for_each_possible_cpu(cpu) {
struct sched_clock_data *scd = cpu_sdc(cpu);
scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
scd->tick_jiffies = now_jiffies;
scd->tick_raw = 0;
scd->tick_gtod = ktime_now;
scd->clock = ktime_now;
@@ -91,47 +86,52 @@ void sched_clock_init(void)
sched_clock_running = 1;
}
/*
* min,max except they take wrapping into account
*/
static inline u64 wrap_min(u64 x, u64 y)
{
return (s64)(x - y) < 0 ? x : y;
}
static inline u64 wrap_max(u64 x, u64 y)
{
return (s64)(x - y) > 0 ? x : y;
}
/*
* update the percpu scd from the raw @now value
*
* - filter out backward motion
* - use jiffies to generate a min,max window to clip the raw values
* - use the GTOD tick value to create a window to filter crazy TSC values
*/
static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
{
unsigned long now_jiffies = jiffies;
long delta_jiffies = now_jiffies - scd->tick_jiffies;
u64 clock = scd->clock;
u64 min_clock, max_clock;
s64 delta = now - scd->tick_raw;
u64 clock, min_clock, max_clock;
WARN_ON_ONCE(!irqs_disabled());
min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
if (unlikely(delta < 0)) {
clock++;
goto out;
}
if (unlikely(delta < 0))
delta = 0;
max_clock = min_clock + TICK_NSEC;
/*
* scd->clock = clamp(scd->tick_gtod + delta,
* max(scd->tick_gtod, scd->clock),
* scd->tick_gtod + TICK_NSEC);
*/
if (unlikely(clock + delta > max_clock)) {
if (clock < max_clock)
clock = max_clock;
else
clock++;
} else {
clock += delta;
}
clock = scd->tick_gtod + delta;
min_clock = wrap_max(scd->tick_gtod, scd->clock);
max_clock = scd->tick_gtod + TICK_NSEC;
out:
if (unlikely(clock < min_clock))
clock = min_clock;
clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock);
scd->tick_jiffies = now_jiffies;
scd->clock = clock;
return clock;
return scd->clock;
}
static void lock_double_clock(struct sched_clock_data *data1,
@@ -171,7 +171,7 @@ u64 sched_clock_cpu(int cpu)
* larger time as the latest time for both
* runqueues. (this creates monotonic movement)
*/
if (likely(remote_clock < this_clock)) {
if (likely((s64)(remote_clock - this_clock) < 0)) {
clock = this_clock;
scd->clock = clock;
} else {
@@ -207,14 +207,9 @@ void sched_clock_tick(void)
now = sched_clock();
__raw_spin_lock(&scd->lock);
__update_sched_clock(scd, now);
/*
* update tick_gtod after __update_sched_clock() because that will
* already observe 1 new jiffy; adding a new tick_gtod to that would
* increase the clock 2 jiffies.
*/
scd->tick_raw = now;
scd->tick_gtod = now_gtod;
__update_sched_clock(scd, now);
__raw_spin_unlock(&scd->lock);
}
@@ -232,18 +227,7 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
*/
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
struct sched_clock_data *scd = this_scd();
/*
* Override the previous timestamp and ignore all
* sched_clock() deltas that occured while we idled,
* and use the PM-provided delta_ns to advance the
* rq clock:
*/
__raw_spin_lock(&scd->lock);
scd->clock += delta_ns;
__raw_spin_unlock(&scd->lock);
sched_clock_tick();
touch_softlockup_watchdog();
}
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);

View File

@@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
{
if (rt_rq->rt_nr_running)
resched_task(rq_of_rt_rq(rt_rq)->curr);
}
static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -438,9 +440,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
{
u64 runtime = sched_rt_runtime(rt_rq);
if (runtime == RUNTIME_INF)
return 0;
if (rt_rq->rt_throttled)
return rt_rq_throttled(rt_rq);
@@ -491,9 +490,11 @@ static void update_curr_rt(struct rq *rq)
rt_rq = rt_rq_of_se(rt_se);
spin_lock(&rt_rq->rt_runtime_lock);
rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
rt_rq->rt_time += delta_exec;
if (sched_rt_runtime_exceeded(rt_rq))
resched_task(curr);
}
spin_unlock(&rt_rq->rt_runtime_lock);
}
}

View File

@@ -210,8 +210,10 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
{
struct call_single_data d;
unsigned long flags;
/* prevent preemption and reschedule on another processor */
/* prevent preemption and reschedule on another processor,
as well as CPU removal */
int me = get_cpu();
int err = 0;
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
@@ -220,7 +222,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
local_irq_save(flags);
func(info);
local_irq_restore(flags);
} else {
} else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
struct call_single_data *data = NULL;
if (!wait) {
@@ -236,10 +238,12 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
data->func = func;
data->info = info;
generic_exec_single(cpu, data);
} else {
err = -ENXIO; /* CPU not online */
}
put_cpu();
return 0;
return err;
}
EXPORT_SYMBOL(smp_call_function_single);

View File

@@ -233,7 +233,8 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
do_each_thread(g, t) {
if (!--max_count)
goto unlock;
if (t->state & TASK_UNINTERRUPTIBLE)
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
check_hung_task(t, now);
} while_each_thread(g, t);
unlock:

View File

@@ -157,6 +157,7 @@ static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *
static struct ctl_table root_table[];
static struct ctl_table_root sysctl_table_root;
static struct ctl_table_header root_table_header = {
.count = 1,
.ctl_table = root_table,
.ctl_entry = LIST_HEAD_INIT(sysctl_table_root.default_set.list),
.root = &sysctl_table_root,

View File

@@ -177,7 +177,7 @@ void clockevents_register_device(struct clock_event_device *dev)
/*
* Noop handler when we shut down an event device
*/
static void clockevents_handle_noop(struct clock_event_device *dev)
void clockevents_handle_noop(struct clock_event_device *dev)
{
}
@@ -199,7 +199,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
* released list and do a notify add later.
*/
if (old) {
old->event_handler = clockevents_handle_noop;
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
list_del(&old->list);
list_add(&old->list, &clockevents_released);

View File

@@ -245,7 +245,7 @@ static void sync_cmos_clock(unsigned long dummy)
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
fail = update_persistent_clock(now);
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec;
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
if (next.tv_nsec <= 0)
next.tv_nsec += NSEC_PER_SEC;

View File

@@ -175,6 +175,8 @@ static void tick_do_periodic_broadcast(void)
*/
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
{
ktime_t next;
tick_do_periodic_broadcast();
/*
@@ -185,10 +187,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
/*
* Setup the next period for devices, which do not have
* periodic mode:
* periodic mode. We read dev->next_event first and add to it
* when the event alrady expired. clockevents_program_event()
* sets dev->next_event only when the event is really
* programmed to the device.
*/
for (;;) {
ktime_t next = ktime_add(dev->next_event, tick_period);
for (next = dev->next_event; ;) {
next = ktime_add(next, tick_period);
if (!clockevents_program_event(dev, next, ktime_get()))
return;
@@ -205,7 +210,7 @@ static void tick_do_broadcast_on_off(void *why)
struct clock_event_device *bc, *dev;
struct tick_device *td;
unsigned long flags, *reason = why;
int cpu;
int cpu, bc_stopped;
spin_lock_irqsave(&tick_broadcast_lock, flags);
@@ -223,6 +228,8 @@ static void tick_do_broadcast_on_off(void *why)
if (!tick_device_is_functional(dev))
goto out;
bc_stopped = cpus_empty(tick_broadcast_mask);
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
@@ -245,9 +252,10 @@ static void tick_do_broadcast_on_off(void *why)
break;
}
if (cpus_empty(tick_broadcast_mask))
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
else {
if (cpus_empty(tick_broadcast_mask)) {
if (!bc_stopped)
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
} else if (bc_stopped) {
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
tick_broadcast_start_periodic(bc);
else
@@ -364,16 +372,8 @@ cpumask_t *tick_get_broadcast_oneshot_mask(void)
static int tick_broadcast_set_event(ktime_t expires, int force)
{
struct clock_event_device *bc = tick_broadcast_device.evtdev;
ktime_t now = ktime_get();
int res;
for(;;) {
res = clockevents_program_event(bc, expires, now);
if (!res || !force)
return res;
now = ktime_get();
expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
}
return tick_dev_program_event(bc, expires, force);
}
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
@@ -491,14 +491,52 @@ static void tick_broadcast_clear_oneshot(int cpu)
cpu_clear(cpu, tick_broadcast_oneshot_mask);
}
static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
{
struct tick_device *td;
int cpu;
for_each_cpu_mask_nr(cpu, *mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev)
td->evtdev->next_event = expires;
}
}
/**
* tick_broadcast_setup_oneshot - setup the broadcast device
*/
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
{
bc->event_handler = tick_handle_oneshot_broadcast;
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
bc->next_event.tv64 = KTIME_MAX;
/* Set it up only once ! */
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
int cpu = smp_processor_id();
cpumask_t mask;
bc->event_handler = tick_handle_oneshot_broadcast;
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
/* Take the do_timer update */
tick_do_timer_cpu = cpu;
/*
* We must be careful here. There might be other CPUs
* waiting for periodic broadcast. We need to set the
* oneshot_mask bits for those and program the
* broadcast device to fire.
*/
mask = tick_broadcast_mask;
cpu_clear(cpu, mask);
cpus_or(tick_broadcast_oneshot_mask,
tick_broadcast_oneshot_mask, mask);
if (was_periodic && !cpus_empty(mask)) {
tick_broadcast_init_next_event(&mask, tick_next_period);
tick_broadcast_set_event(tick_next_period, 1);
} else
bc->next_event.tv64 = KTIME_MAX;
}
}
/*

View File

@@ -161,6 +161,7 @@ static void tick_setup_device(struct tick_device *td,
} else {
handler = td->evtdev->event_handler;
next_event = td->evtdev->next_event;
td->evtdev->event_handler = clockevents_handle_noop;
}
td->evtdev = newdev;

View File

@@ -17,6 +17,8 @@ extern void tick_handle_periodic(struct clock_event_device *dev);
extern void tick_setup_oneshot(struct clock_event_device *newdev,
void (*handler)(struct clock_event_device *),
ktime_t nextevt);
extern int tick_dev_program_event(struct clock_event_device *dev,
ktime_t expires, int force);
extern int tick_program_event(ktime_t expires, int force);
extern void tick_oneshot_notify(void);
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));

View File

@@ -22,22 +22,54 @@
#include "tick-internal.h"
/**
* tick_program_event internal worker function
*/
int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
int force)
{
ktime_t now = ktime_get();
int i;
for (i = 0;;) {
int ret = clockevents_program_event(dev, expires, now);
if (!ret || !force)
return ret;
/*
* We tried 2 times to program the device with the given
* min_delta_ns. If that's not working then we double it
* and emit a warning.
*/
if (++i > 2) {
/* Increase the min. delta and try again */
if (!dev->min_delta_ns)
dev->min_delta_ns = 5000;
else
dev->min_delta_ns += dev->min_delta_ns >> 1;
printk(KERN_WARNING
"CE: %s increasing min_delta_ns to %lu nsec\n",
dev->name ? dev->name : "?",
dev->min_delta_ns << 1);
i = 0;
}
now = ktime_get();
expires = ktime_add_ns(now, dev->min_delta_ns);
}
}
/**
* tick_program_event
*/
int tick_program_event(ktime_t expires, int force)
{
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
ktime_t now = ktime_get();
while (1) {
int ret = clockevents_program_event(dev, expires, now);
if (!ret || !force)
return ret;
now = ktime_get();
expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
}
return tick_dev_program_event(dev, expires, force);
}
/**
@@ -61,7 +93,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
{
newdev->event_handler = handler;
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
clockevents_program_event(newdev, next_event, ktime_get());
tick_dev_program_event(newdev, next_event, 1);
}
/**

View File

@@ -162,6 +162,8 @@ void tick_nohz_stop_idle(int cpu)
ts->idle_lastupdate = now;
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_active = 0;
sched_clock_idle_wakeup_event(0);
}
}
@@ -177,6 +179,7 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
}
ts->idle_entrytime = now;
ts->idle_active = 1;
sched_clock_idle_sleep_event();
return now;
}