Merge branch 'percpu-for-linus' into percpu-for-next

Conflicts:
	arch/sparc/kernel/smp_64.c
	arch/x86/kernel/cpu/perf_counter.c
	arch/x86/kernel/setup_percpu.c
	drivers/cpufreq/cpufreq_ondemand.c
	mm/percpu.c

Conflicts in core and arch percpu codes are mostly from commit
ed78e1e078dd44249f88b1dd8c76dafb39567161 which substituted many
num_possible_cpus() with nr_cpu_ids.  As for-next branch has moved all
the first chunk allocators into mm/percpu.c, the changes are moved
from arch code to mm/percpu.c.

Signed-off-by: Tejun Heo <tj@kernel.org>
This commit is contained in:
Tejun Heo
2009-08-14 14:41:02 +09:00
1985 changed files with 49463 additions and 28163 deletions

View File

@@ -47,6 +47,7 @@
#include <linux/hash.h>
#include <linux/namei.h>
#include <linux/smp_lock.h>
#include <linux/pid_namespace.h>
#include <asm/atomic.h>
@@ -734,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
* reference to css->refcnt. In general, this refcnt is expected to goes down
* to zero, soon.
*
* CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
* CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
*/
DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
{
if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
wake_up_all(&cgroup_rmdir_waitq);
}
void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
{
css_get(css);
}
void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
{
cgroup_wakeup_rmdir_waiter(css->cgroup);
css_put(css);
}
static int rebind_subsystems(struct cgroupfs_root *root,
unsigned long final_bits)
{
@@ -960,6 +973,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->children);
INIT_LIST_HEAD(&cgrp->css_sets);
INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pids_list);
init_rwsem(&cgrp->pids_mutex);
}
static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1357,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
* wake up rmdir() waiter. the rmdir should fail since the cgroup
* is no longer empty.
*/
cgroup_wakeup_rmdir_waiters(cgrp);
cgroup_wakeup_rmdir_waiter(cgrp);
return 0;
}
@@ -2201,12 +2215,30 @@ err:
return ret;
}
/*
* Cache pids for all threads in the same pid namespace that are
* opening the same "tasks" file.
*/
struct cgroup_pids {
/* The node in cgrp->pids_list */
struct list_head list;
/* The cgroup those pids belong to */
struct cgroup *cgrp;
/* The namepsace those pids belong to */
struct pid_namespace *ns;
/* Array of process ids in the cgroup */
pid_t *tasks_pids;
/* How many files are using the this tasks_pids array */
int use_count;
/* Length of the current tasks_pids array */
int length;
};
static int cmppid(const void *a, const void *b)
{
return *(pid_t *)a - *(pid_t *)b;
}
/*
* seq_file methods for the "tasks" file. The seq_file position is the
* next pid to display; the seq_file iterator is a pointer to the pid
@@ -2221,45 +2253,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
* after a seek to the start). Use a binary-search to find the
* next pid to display, if any
*/
struct cgroup *cgrp = s->private;
struct cgroup_pids *cp = s->private;
struct cgroup *cgrp = cp->cgrp;
int index = 0, pid = *pos;
int *iter;
down_read(&cgrp->pids_mutex);
if (pid) {
int end = cgrp->pids_length;
int end = cp->length;
while (index < end) {
int mid = (index + end) / 2;
if (cgrp->tasks_pids[mid] == pid) {
if (cp->tasks_pids[mid] == pid) {
index = mid;
break;
} else if (cgrp->tasks_pids[mid] <= pid)
} else if (cp->tasks_pids[mid] <= pid)
index = mid + 1;
else
end = mid;
}
}
/* If we're off the end of the array, we're done */
if (index >= cgrp->pids_length)
if (index >= cp->length)
return NULL;
/* Update the abstract position to be the actual pid that we found */
iter = cgrp->tasks_pids + index;
iter = cp->tasks_pids + index;
*pos = *iter;
return iter;
}
static void cgroup_tasks_stop(struct seq_file *s, void *v)
{
struct cgroup *cgrp = s->private;
struct cgroup_pids *cp = s->private;
struct cgroup *cgrp = cp->cgrp;
up_read(&cgrp->pids_mutex);
}
static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
{
struct cgroup *cgrp = s->private;
struct cgroup_pids *cp = s->private;
int *p = v;
int *end = cgrp->tasks_pids + cgrp->pids_length;
int *end = cp->tasks_pids + cp->length;
/*
* Advance to the next pid in the array. If this goes off the
@@ -2286,26 +2320,33 @@ static struct seq_operations cgroup_tasks_seq_operations = {
.show = cgroup_tasks_show,
};
static void release_cgroup_pid_array(struct cgroup *cgrp)
static void release_cgroup_pid_array(struct cgroup_pids *cp)
{
struct cgroup *cgrp = cp->cgrp;
down_write(&cgrp->pids_mutex);
BUG_ON(!cgrp->pids_use_count);
if (!--cgrp->pids_use_count) {
kfree(cgrp->tasks_pids);
cgrp->tasks_pids = NULL;
cgrp->pids_length = 0;
BUG_ON(!cp->use_count);
if (!--cp->use_count) {
list_del(&cp->list);
put_pid_ns(cp->ns);
kfree(cp->tasks_pids);
kfree(cp);
}
up_write(&cgrp->pids_mutex);
}
static int cgroup_tasks_release(struct inode *inode, struct file *file)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
struct seq_file *seq;
struct cgroup_pids *cp;
if (!(file->f_mode & FMODE_READ))
return 0;
release_cgroup_pid_array(cgrp);
seq = file->private_data;
cp = seq->private;
release_cgroup_pid_array(cp);
return seq_release(inode, file);
}
@@ -2324,6 +2365,8 @@ static struct file_operations cgroup_tasks_operations = {
static int cgroup_tasks_open(struct inode *unused, struct file *file)
{
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
struct pid_namespace *ns = current->nsproxy->pid_ns;
struct cgroup_pids *cp;
pid_t *pidarray;
int npids;
int retval;
@@ -2350,20 +2393,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
* array if necessary
*/
down_write(&cgrp->pids_mutex);
kfree(cgrp->tasks_pids);
cgrp->tasks_pids = pidarray;
cgrp->pids_length = npids;
cgrp->pids_use_count++;
list_for_each_entry(cp, &cgrp->pids_list, list) {
if (ns == cp->ns)
goto found;
}
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp) {
up_write(&cgrp->pids_mutex);
kfree(pidarray);
return -ENOMEM;
}
cp->cgrp = cgrp;
cp->ns = ns;
get_pid_ns(ns);
list_add(&cp->list, &cgrp->pids_list);
found:
kfree(cp->tasks_pids);
cp->tasks_pids = pidarray;
cp->length = npids;
cp->use_count++;
up_write(&cgrp->pids_mutex);
file->f_op = &cgroup_tasks_operations;
retval = seq_open(file, &cgroup_tasks_seq_operations);
if (retval) {
release_cgroup_pid_array(cgrp);
release_cgroup_pid_array(cp);
return retval;
}
((struct seq_file *)file->private_data)->private = cgrp;
((struct seq_file *)file->private_data)->private = cp;
return 0;
}
@@ -2695,34 +2755,43 @@ again:
}
mutex_unlock(&cgroup_mutex);
/*
* In general, subsystem has no css->refcnt after pre_destroy(). But
* in racy cases, subsystem may have to get css->refcnt after
* pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
* make rmdir return -EBUSY too often. To avoid that, we use waitqueue
* for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
* and subsystem's reference count handling. Please see css_get/put
* and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
*/
set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
/*
* Call pre_destroy handlers of subsys. Notify subsystems
* that rmdir() request comes.
*/
ret = cgroup_call_pre_destroy(cgrp);
if (ret)
if (ret) {
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
return ret;
}
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
/*
* css_put/get is provided for subsys to grab refcnt to css. In typical
* case, subsystem has no reference after pre_destroy(). But, under
* hierarchy management, some *temporal* refcnt can be hold.
* To avoid returning -EBUSY to a user, waitqueue is used. If subsys
* is really busy, it should return -EBUSY at pre_destroy(). wake_up
* is called when css_put() is called and refcnt goes down to 0.
*/
set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
if (!cgroup_clear_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
schedule();
/*
* Because someone may call cgroup_wakeup_rmdir_waiter() before
* prepare_to_wait(), we need to check this flag.
*/
if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
schedule();
finish_wait(&cgroup_rmdir_waitq, &wait);
clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
if (signal_pending(current))
@@ -3294,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}
cgroup_wakeup_rmdir_waiters(cgrp);
cgroup_wakeup_rmdir_waiter(cgrp);
}
rcu_read_unlock();
}

View File

@@ -12,7 +12,6 @@
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
#include <linux/mnt_namespace.h>
#include <linux/iocontext.h>
#include <linux/key.h>
#include <linux/security.h>

View File

@@ -17,7 +17,6 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
#include <linux/mnt_namespace.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
@@ -427,6 +426,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
@@ -568,18 +568,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* the value intact in a core dump, and to save the unnecessary
* trouble otherwise. Userland only wants this done for a sys_exit.
*/
if (tsk->clear_child_tid
&& !(tsk->flags & PF_SIGNALED)
&& atomic_read(&mm->mm_users) > 1) {
u32 __user * tidptr = tsk->clear_child_tid;
if (tsk->clear_child_tid) {
if (!(tsk->flags & PF_SIGNALED) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
* not set up a proper pointer then tough luck.
*/
put_user(0, tsk->clear_child_tid);
sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
1, NULL, NULL, 0);
}
tsk->clear_child_tid = NULL;
/*
* We don't check the error code - if userspace has
* not set up a proper pointer then tough luck.
*/
put_user(0, tidptr);
sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
}
@@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
write_unlock_irq(&tasklist_lock);
proc_fork_connector(p);
cgroup_post_fork(p);
perf_counter_fork(p);
return p;
bad_fork_free_pid:
@@ -1408,12 +1409,6 @@ long do_fork(unsigned long clone_flags,
if (clone_flags & CLONE_VFORK) {
p->vfork_done = &vfork;
init_completion(&vfork);
} else if (!(clone_flags & CLONE_VM)) {
/*
* vfork will do an exec which will call
* set_task_comm()
*/
perf_counter_fork(p);
}
audit_finish_fork(p);

View File

@@ -44,12 +44,19 @@ void refrigerator(void)
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
/* prevent accounting of that task to load */
current->flags |= PF_FREEZING;
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!frozen(current))
break;
schedule();
}
/* Remove the accounting blocker */
current->flags &= ~PF_FREEZING;
pr_debug("%s left refrigerator\n", current->comm);
__set_current_state(save);
}

View File

@@ -247,6 +247,7 @@ again:
if (err < 0)
return err;
page = compound_head(page);
lock_page(page);
if (!page->mapping) {
unlock_page(page);
@@ -1009,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
* requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
* q: the futex_q
* key: the key of the requeue target futex
* hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
* target futex if it is uncontended or via a lock steal. Set the futex_q key
* to the requeue target futex so the waiter can detect the wakeup on the right
* futex, but remove it from the hb and NULL the rt_waiter so it can detect
* atomic lock acquisition. Must be called with the q->lock_ptr held.
* atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
* to protect access to the pi_state to fixup the owner later. Must be called
* with both q->lock_ptr and hb->lock held.
*/
static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
@@ -1029,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
WARN_ON(!q->rt_waiter);
q->rt_waiter = NULL;
q->lock_ptr = &hb->lock;
#ifdef CONFIG_DEBUG_PI_LIST
q->list.plist.lock = &hb->lock;
#endif
wake_up_state(q->task, TASK_NORMAL);
}
@@ -1087,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
set_waiters);
if (ret == 1)
requeue_pi_wake_futex(top_waiter, key2);
requeue_pi_wake_futex(top_waiter, key2, hb2);
return ret;
}
@@ -1246,8 +1256,15 @@ retry_private:
if (!match_futex(&this->key, &key1))
continue;
WARN_ON(!requeue_pi && this->rt_waiter);
WARN_ON(requeue_pi && !this->rt_waiter);
/*
* FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
* be paired with each other and no other futex ops.
*/
if ((requeue_pi && !this->rt_waiter) ||
(!requeue_pi && this->rt_waiter)) {
ret = -EINVAL;
break;
}
/*
* Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1272,7 +1289,7 @@ retry_private:
this->task, 1);
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2);
requeue_pi_wake_futex(this, &key2, hb2);
continue;
} else if (ret) {
/* -EDEADLK */

View File

@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
int cmd = op & FUTEX_CMD_MASK;
if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
cmd == FUTEX_WAIT_BITSET)) {
cmd == FUTEX_WAIT_BITSET ||
cmd == FUTEX_WAIT_REQUEUE_PI)) {
if (get_compat_timespec(&ts, utime))
return -EFAULT;
if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
t = ktime_add_safe(ktime_get(), t);
tp = &t;
}
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
val2 = (int) (unsigned long) utime;
return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);

View File

@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
}
}
/*
* Get the preferred target CPU for NOHZ
*/
static int hrtimer_get_target(int this_cpu, int pinned)
{
#ifdef CONFIG_NO_HZ
if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
int preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
return preferred_cpu;
}
#endif
return this_cpu;
}
/*
* With HIGHRES=y we do not migrate the timer when it is expiring
* before the next event on the target cpu because we cannot reprogram
* the target cpu hardware and we would cause it to fire late.
*
* Called with cpu_base->lock of target cpu held.
*/
static int
hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
{
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires;
if (!new_base->cpu_base->hres_active)
return 0;
expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
#else
return 0;
#endif
}
/*
* Switch the timer base to the current CPU when possible.
*/
@@ -200,16 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
int cpu, preferred_cpu = -1;
cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
preferred_cpu = get_nohz_load_balancer();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif
int this_cpu = smp_processor_id();
int cpu = hrtimer_get_target(this_cpu, pinned);
again:
new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -217,7 +249,7 @@ again:
if (base != new_base) {
/*
* We are trying to schedule the timer on the local CPU.
* We are trying to move timer to new_base.
* However we can't change timer's base while it is running,
* so we keep it on the same CPU. No hassle vs. reprogramming
* the event source in the high resolution case. The softirq
@@ -233,38 +265,12 @@ again:
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
/* Optimized away for NOHZ=n SMP=n */
if (cpu == preferred_cpu) {
/* Calculate clock monotonic expiry time */
#ifdef CONFIG_HIGH_RES_TIMERS
ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
new_base->offset);
#else
ktime_t expires = hrtimer_get_expires(timer);
#endif
/*
* Get the next event on target cpu from the
* clock events layer.
* This covers the highres=off nohz=on case as well.
*/
ktime_t next = clockevents_get_next_event(cpu);
ktime_t delta = ktime_sub(expires, next);
/*
* We do not migrate the timer when it is expiring
* before the next event on the target cpu because
* we cannot reprogram the target cpu hardware and
* we would cause it to fire late.
*/
if (delta.tv64 < 0) {
cpu = smp_processor_id();
spin_unlock(&new_base->cpu_base->lock);
spin_lock(&base->cpu_base->lock);
timer->base = base;
goto again;
}
if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
cpu = this_cpu;
spin_unlock(&new_base->cpu_base->lock);
spin_lock(&base->cpu_base->lock);
timer->base = base;
goto again;
}
timer->base = new_base;
}
@@ -1276,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
expires_next.tv64 = KTIME_MAX;
spin_lock(&cpu_base->lock);
/*
* We set expires_next to KTIME_MAX here with cpu_base->lock
* held to prevent that a timer is enqueued in our queue via
* the migration code. This does not affect enqueueing of
* timers which run their callback and need to be requeued on
* this CPU.
*/
cpu_base->expires_next.tv64 = KTIME_MAX;
base = cpu_base->clock_base;
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
ktime_t basenow;
struct rb_node *node;
spin_lock(&cpu_base->lock);
basenow = ktime_add(now, base->offset);
while ((node = base->first)) {
@@ -1316,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
__run_hrtimer(timer);
}
spin_unlock(&cpu_base->lock);
base++;
}
/*
* Store the new expiry value so the migration code can verify
* against it.
*/
cpu_base->expires_next = expires_next;
spin_unlock(&cpu_base->lock);
/* Reprogramming necessary ? */
if (expires_next.tv64 != KTIME_MAX) {

View File

@@ -42,8 +42,7 @@ static inline void unregister_handler_proc(unsigned int irq,
extern int irq_select_affinity_usr(unsigned int irq);
extern void
irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
extern void irq_set_thread_affinity(struct irq_desc *desc);
/*
* Debugging printout:

View File

@@ -80,14 +80,22 @@ int irq_can_set_affinity(unsigned int irq)
return 1;
}
void
irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
/**
* irq_set_thread_affinity - Notify irq threads to adjust affinity
* @desc: irq descriptor which has affitnity changed
*
* We just set IRQTF_AFFINITY and delegate the affinity setting
* to the interrupt thread itself. We can not call
* set_cpus_allowed_ptr() here as we hold desc->lock and this
* code can be called from hard interrupt context.
*/
void irq_set_thread_affinity(struct irq_desc *desc)
{
struct irqaction *action = desc->action;
while (action) {
if (action->thread)
set_cpus_allowed_ptr(action->thread, cpumask);
set_bit(IRQTF_AFFINITY, &action->thread_flags);
action = action->next;
}
}
@@ -112,7 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
if (desc->status & IRQ_MOVE_PCNTXT) {
if (!desc->chip->set_affinity(irq, cpumask)) {
cpumask_copy(desc->affinity, cpumask);
irq_set_thread_affinity(desc, cpumask);
irq_set_thread_affinity(desc);
}
}
else {
@@ -122,7 +130,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
#else
if (!desc->chip->set_affinity(irq, cpumask)) {
cpumask_copy(desc->affinity, cpumask);
irq_set_thread_affinity(desc, cpumask);
irq_set_thread_affinity(desc);
}
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -176,7 +184,7 @@ int irq_select_affinity_usr(unsigned int irq)
spin_lock_irqsave(&desc->lock, flags);
ret = setup_affinity(irq, desc);
if (!ret)
irq_set_thread_affinity(desc, desc->affinity);
irq_set_thread_affinity(desc);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
@@ -443,6 +451,39 @@ static int irq_wait_for_interrupt(struct irqaction *action)
return -1;
}
#ifdef CONFIG_SMP
/*
* Check whether we need to change the affinity of the interrupt thread.
*/
static void
irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
{
cpumask_var_t mask;
if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
return;
/*
* In case we are out of memory we set IRQTF_AFFINITY again and
* try again next time
*/
if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
set_bit(IRQTF_AFFINITY, &action->thread_flags);
return;
}
spin_lock_irq(&desc->lock);
cpumask_copy(mask, desc->affinity);
spin_unlock_irq(&desc->lock);
set_cpus_allowed_ptr(current, mask);
free_cpumask_var(mask);
}
#else
static inline void
irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) { }
#endif
/*
* Interrupt handler thread
*/
@@ -458,6 +499,8 @@ static int irq_thread(void *data)
while (!irq_wait_for_interrupt(action)) {
irq_thread_check_affinity(desc, action);
atomic_inc(&desc->threads_active);
spin_lock_irq(&desc->lock);
@@ -718,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
struct task_struct *irqthread;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -766,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
desc->chip->disable(irq);
}
irqthread = action->thread;
action->thread = NULL;
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
@@ -776,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
if (irqthread) {
if (!test_bit(IRQTF_DIED, &action->thread_flags))
kthread_stop(irqthread);
put_task_struct(irqthread);
}
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -797,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
local_irq_restore(flags);
}
#endif
if (action->thread) {
if (!test_bit(IRQTF_DIED, &action->thread_flags))
kthread_stop(action->thread);
put_task_struct(action->thread);
}
return action;
}

View File

@@ -45,7 +45,7 @@ void move_masked_irq(int irq)
< nr_cpu_ids))
if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
cpumask_copy(desc->affinity, desc->pending_mask);
irq_set_thread_affinity(desc, desc->pending_mask);
irq_set_thread_affinity(desc);
}
cpumask_clear(desc->pending_mask);

View File

@@ -107,8 +107,8 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
/* those all static, do move them */
if (desc->irq < NR_IRQS_LEGACY)
/* those static or target node is -1, do not move them */
if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
if (desc->node != node)

View File

@@ -1228,7 +1228,7 @@ static int __init parse_crashkernel_mem(char *cmdline,
} while (*cur++ == ',');
if (*crash_size > 0) {
while (*cur != ' ' && *cur != '@')
while (*cur && *cur != ' ' && *cur != '@')
cur++;
if (*cur == '@') {
cur++;

View File

@@ -24,7 +24,6 @@
#include <linux/unistd.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/mnt_namespace.h>
#include <linux/completion.h>
#include <linux/file.h>
#include <linux/fdtable.h>

View File

@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
int safety;
/* Ensure no-one is preepmted on the garbages */
mutex_unlock(&kprobe_insn_mutex);
safety = check_safety();
mutex_lock(&kprobe_insn_mutex);
if (safety != 0)
if (check_safety())
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -698,7 +694,7 @@ int __kprobes register_kprobe(struct kprobe *p)
p->addr = addr;
preempt_disable();
if (!__kernel_text_address((unsigned long) p->addr) ||
if (!kernel_text_address((unsigned long) p->addr) ||
in_kprobes_functions((unsigned long) p->addr)) {
preempt_enable();
return -EINVAL;

View File

@@ -180,10 +180,12 @@ EXPORT_SYMBOL(kthread_bind);
* @k: thread created by kthread_create().
*
* Sets kthread_should_stop() for @k to return true, wakes it, and
* waits for it to exit. Your threadfn() must not call do_exit()
* itself if you use this function! This can also be called after
* kthread_create() instead of calling wake_up_process(): the thread
* will exit without calling threadfn().
* waits for it to exit. This can also be called after kthread_create()
* instead of calling wake_up_process(): the thread will exit without
* calling threadfn().
*
* If threadfn() may call do_exit() itself, the caller must ensure
* task_struct can't go away.
*
* Returns the result of threadfn(), or %-EINTR if wake_up_process()
* was never called.

View File

@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations);
proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
&proc_lock_stat_operations);
#endif
return 0;

View File

@@ -1068,7 +1068,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
{
const unsigned long *crc;
if (!find_symbol("module_layout", NULL, &crc, true, false))
if (!find_symbol(MODULE_SYMBOL_PREFIX "module_layout", NULL,
&crc, true, false))
BUG();
return check_version(sechdrs, versindex, "module_layout", mod, crc);
}
@@ -2451,9 +2452,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return ret;
}
if (ret > 0) {
printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, "
"it should follow 0/-E convention\n"
KERN_WARNING "%s: loading module anyway...\n",
printk(KERN_WARNING
"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
"%s: loading module anyway...\n",
__func__, mod->name, ret,
__func__);
dump_stack();

View File

@@ -301,6 +301,7 @@ int oops_may_print(void)
*/
void oops_enter(void)
{
tracing_off();
/* can't trust the integrity of the kernel anymore: */
debug_locks_off();
do_oops_enter_exit();

File diff suppressed because it is too large Load Diff

View File

@@ -36,7 +36,6 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
#include <linux/kmemleak.h>
#define pid_hashfn(nr, ns) \
hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -513,12 +512,6 @@ void __init pidhash_init(void)
pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
if (!pid_hash)
panic("Could not alloc pidhash!\n");
/*
* pid_hash contains references to allocated struct pid objects and it
* must be scanned by kmemleak to avoid false positives.
*/
kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
GFP_KERNEL);
for (i = 0; i < pidhash_size; i++)
INIT_HLIST_HEAD(&pid_hash[i]);
}

View File

@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct task_cputime cputime;
struct signal_struct *const sig = tsk->signal;
thread_group_cputimer(tsk, &cputime);
cleanup_timers(tsk->signal->cpu_timers,
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
cputime_add(tsk->utime, sig->utime),
cputime_add(tsk->stime, sig->stime),
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)

View File

@@ -202,6 +202,12 @@ static int no_timer_create(struct k_itimer *new_timer)
return -EOPNOTSUPP;
}
static int no_nsleep(const clockid_t which_clock, int flags,
struct timespec *tsave, struct timespec __user *rmtp)
{
return -EOPNOTSUPP;
}
/*
* Return nonzero if we know a priori this clockid_t value is bogus.
*/
@@ -254,6 +260,7 @@ static __init int init_posix_timers(void)
.clock_get = posix_get_monotonic_raw,
.clock_set = do_posix_clock_nosettime,
.timer_create = no_timer_create,
.nsleep = no_nsleep,
};
register_posix_clock(CLOCK_REALTIME, &clock_realtime);

View File

@@ -23,7 +23,6 @@
#include <linux/console.h>
#include <linux/cpu.h>
#include <linux/freezer.h>
#include <linux/smp_lock.h>
#include <scsi/scsi_scan.h>
#include <asm/uaccess.h>

View File

@@ -117,11 +117,12 @@ int __ref profile_init(void)
cpumask_copy(prof_cpu_mask, cpu_possible_mask);
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN);
if (prof_buffer)
return 0;
prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO);
prof_buffer = alloc_pages_exact(buffer_bytes,
GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
if (prof_buffer)
return 0;

View File

@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
* interference; SUID, SGID and LSM creds get determined differently
* under ptrace.
*/
retval = mutex_lock_interruptible(&task->cred_guard_mutex);
if (retval < 0)
retval = -ERESTARTNOINTR;
if (mutex_lock_interruptible(&task->cred_guard_mutex))
goto out;
task_lock(task);

View File

@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
int j;
struct rcu_node *rnp;
printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n");
printk(KERN_INFO "Hierarchical RCU implementation.\n");
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
/* Register notifier for non-boot CPUs */
register_cpu_notifier(&rcu_nb);
printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
}
module_param(blimit, int, 0);

View File

@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
/* We got the lock for task. */
debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, task, 0);
spin_unlock(&lock->wait_lock);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
if (ret && !waiter->task) {
/*
* Reset the return value. We might have

View File

@@ -493,6 +493,7 @@ struct rt_rq {
#endif
#ifdef CONFIG_SMP
unsigned long rt_nr_migratory;
unsigned long rt_nr_total;
int overloaded;
struct plist_head pushable_tasks;
#endif
@@ -2571,15 +2572,37 @@ static void __sched_fork(struct task_struct *p)
p->se.avg_wakeup = sysctl_sched_wakeup_granularity;
#ifdef CONFIG_SCHEDSTATS
p->se.wait_start = 0;
p->se.sum_sleep_runtime = 0;
p->se.sleep_start = 0;
p->se.block_start = 0;
p->se.sleep_max = 0;
p->se.block_max = 0;
p->se.exec_max = 0;
p->se.slice_max = 0;
p->se.wait_max = 0;
p->se.wait_start = 0;
p->se.wait_max = 0;
p->se.wait_count = 0;
p->se.wait_sum = 0;
p->se.sleep_start = 0;
p->se.sleep_max = 0;
p->se.sum_sleep_runtime = 0;
p->se.block_start = 0;
p->se.block_max = 0;
p->se.exec_max = 0;
p->se.slice_max = 0;
p->se.nr_migrations_cold = 0;
p->se.nr_failed_migrations_affine = 0;
p->se.nr_failed_migrations_running = 0;
p->se.nr_failed_migrations_hot = 0;
p->se.nr_forced_migrations = 0;
p->se.nr_forced2_migrations = 0;
p->se.nr_wakeups = 0;
p->se.nr_wakeups_sync = 0;
p->se.nr_wakeups_migrate = 0;
p->se.nr_wakeups_local = 0;
p->se.nr_wakeups_remote = 0;
p->se.nr_wakeups_affine = 0;
p->se.nr_wakeups_affine_attempts = 0;
p->se.nr_wakeups_passive = 0;
p->se.nr_wakeups_idle = 0;
#endif
INIT_LIST_HEAD(&p->rt.run_list);
@@ -6541,6 +6564,11 @@ SYSCALL_DEFINE0(sched_yield)
return 0;
}
static inline int should_resched(void)
{
return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
}
static void __cond_resched(void)
{
#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6588,7 @@ static void __cond_resched(void)
int __sched _cond_resched(void)
{
if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
system_state == SYSTEM_RUNNING) {
if (should_resched()) {
__cond_resched();
return 1;
}
@@ -6579,12 +6606,12 @@ EXPORT_SYMBOL(_cond_resched);
*/
int cond_resched_lock(spinlock_t *lock)
{
int resched = need_resched() && system_state == SYSTEM_RUNNING;
int resched = should_resched();
int ret = 0;
if (spin_needbreak(lock) || resched) {
spin_unlock(lock);
if (resched && need_resched())
if (resched)
__cond_resched();
else
cpu_relax();
@@ -6599,7 +6626,7 @@ int __sched cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
if (need_resched() && system_state == SYSTEM_RUNNING) {
if (should_resched()) {
local_bh_enable();
__cond_resched();
local_bh_disable();
@@ -7262,6 +7289,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
static void calc_global_load_remove(struct rq *rq)
{
atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
rq->calc_load_active = 0;
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -7488,6 +7516,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
task_rq_unlock(rq, &flags);
get_task_struct(p);
cpu_rq(cpu)->migration_thread = p;
rq->calc_load_update = calc_load_update;
break;
case CPU_ONLINE:
@@ -7498,8 +7527,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
/* Update our root-domain */
rq = cpu_rq(cpu);
spin_lock_irqsave(&rq->lock, flags);
rq->calc_load_update = calc_load_update;
rq->calc_load_active = 0;
if (rq->rd) {
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -9070,7 +9097,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
#ifdef CONFIG_SMP
rt_rq->rt_nr_migratory = 0;
rt_rq->overloaded = 0;
plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
plist_head_init(&rt_rq->pushable_tasks, &rq->lock);
#endif
rt_rq->rt_time = 0;

View File

@@ -81,8 +81,21 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
continue;
if (lowest_mask)
if (lowest_mask) {
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
/*
* We have to ensure that we have at least one bit
* still set in the array, since the map could have
* been concurrently emptied between the first and
* second reads of vec->mask. If we hit this
* condition, simply act as though we never hit this
* priority level and continue on.
*/
if (cpumask_any(lowest_mask) >= nr_cpu_ids)
continue;
}
return 1;
}

View File

@@ -266,6 +266,12 @@ static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
return min_vruntime;
}
static inline int entity_before(struct sched_entity *a,
struct sched_entity *b)
{
return (s64)(a->vruntime - b->vruntime) < 0;
}
static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
return se->vruntime - cfs_rq->min_vruntime;
@@ -605,9 +611,13 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
#ifdef CONFIG_SCHEDSTATS
struct task_struct *tsk = NULL;
if (entity_is_task(se))
tsk = task_of(se);
if (se->sleep_start) {
u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
struct task_struct *tsk = task_of(se);
if ((s64)delta < 0)
delta = 0;
@@ -618,11 +628,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->sleep_start = 0;
se->sum_sleep_runtime += delta;
account_scheduler_latency(tsk, delta >> 10, 1);
if (tsk)
account_scheduler_latency(tsk, delta >> 10, 1);
}
if (se->block_start) {
u64 delta = rq_of(cfs_rq)->clock - se->block_start;
struct task_struct *tsk = task_of(se);
if ((s64)delta < 0)
delta = 0;
@@ -633,17 +643,19 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->block_start = 0;
se->sum_sleep_runtime += delta;
/*
* Blocking time is in units of nanosecs, so shift by 20 to
* get a milliseconds-range estimation of the amount of
* time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
profile_hits(SLEEP_PROFILING, (void *)get_wchan(tsk),
delta >> 20);
if (tsk) {
/*
* Blocking time is in units of nanosecs, so shift by
* 20 to get a milliseconds-range estimation of the
* amount of time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
profile_hits(SLEEP_PROFILING,
(void *)get_wchan(tsk),
delta >> 20);
}
account_scheduler_latency(tsk, delta >> 10, 0);
}
account_scheduler_latency(tsk, delta >> 10, 0);
}
#endif
}
@@ -687,7 +699,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
* all of which have the same weight.
*/
if (sched_feat(NORMALIZED_SLEEPER) &&
task_of(se)->policy != SCHED_IDLE)
(!entity_is_task(se) ||
task_of(se)->policy != SCHED_IDLE))
thresh = calc_delta_fair(thresh, se);
vruntime -= thresh;
@@ -1016,7 +1029,7 @@ static void yield_task_fair(struct rq *rq)
/*
* Already in the rightmost position?
*/
if (unlikely(!rightmost || rightmost->vruntime < se->vruntime))
if (unlikely(!rightmost || entity_before(rightmost, se)))
return;
/*
@@ -1712,7 +1725,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
/* 'curr' will be NULL if the child belongs to a different group */
if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
curr && curr->vruntime < se->vruntime) {
curr && entity_before(curr, se)) {
/*
* Upon rescheduling, sched_class::put_prev_task() will place
* 'current' within the tree based on its new key value.

View File

@@ -10,6 +10,8 @@ static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
#ifdef CONFIG_RT_GROUP_SCHED
#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
@@ -22,6 +24,8 @@ static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
#else /* CONFIG_RT_GROUP_SCHED */
#define rt_entity_is_task(rt_se) (1)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
@@ -73,7 +77,7 @@ static inline void rt_clear_overload(struct rq *rq)
static void update_rt_migration(struct rt_rq *rt_rq)
{
if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) {
if (!rt_rq->overloaded) {
rt_set_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 1;
@@ -86,6 +90,12 @@ static void update_rt_migration(struct rt_rq *rt_rq)
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
if (!rt_entity_is_task(rt_se))
return;
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total++;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory++;
@@ -94,6 +104,12 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
if (!rt_entity_is_task(rt_se))
return;
rt_rq = &rq_of_rt_rq(rt_rq)->rt;
rt_rq->rt_nr_total--;
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory--;

View File

@@ -2454,11 +2454,9 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
stack_t oss;
int error;
if (uoss) {
oss.ss_sp = (void __user *) current->sas_ss_sp;
oss.ss_size = current->sas_ss_size;
oss.ss_flags = sas_ss_flags(sp);
}
oss.ss_sp = (void __user *) current->sas_ss_sp;
oss.ss_size = current->sas_ss_size;
oss.ss_flags = sas_ss_flags(sp);
if (uss) {
void __user *ss_sp;
@@ -2466,10 +2464,12 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
int ss_flags;
error = -EFAULT;
if (!access_ok(VERIFY_READ, uss, sizeof(*uss))
|| __get_user(ss_sp, &uss->ss_sp)
|| __get_user(ss_flags, &uss->ss_flags)
|| __get_user(ss_size, &uss->ss_size))
if (!access_ok(VERIFY_READ, uss, sizeof(*uss)))
goto out;
error = __get_user(ss_sp, &uss->ss_sp) |
__get_user(ss_flags, &uss->ss_flags) |
__get_user(ss_size, &uss->ss_size);
if (error)
goto out;
error = -EPERM;
@@ -2501,13 +2501,16 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
current->sas_ss_size = ss_size;
}
error = 0;
if (uoss) {
error = -EFAULT;
if (copy_to_user(uoss, &oss, sizeof(oss)))
if (!access_ok(VERIFY_WRITE, uoss, sizeof(*uoss)))
goto out;
error = __put_user(oss.ss_sp, &uoss->ss_sp) |
__put_user(oss.ss_size, &uoss->ss_size) |
__put_user(oss.ss_flags, &uoss->ss_flags);
}
error = 0;
out:
return error;
}

View File

@@ -57,7 +57,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_BAD;
break;
#ifdef CONFIG_CPU_HOTPLUG
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:

View File

@@ -345,7 +345,9 @@ void open_softirq(int nr, void (*action)(struct softirq_action *))
softirq_vec[nr].action = action;
}
/* Tasklets */
/*
* Tasklets
*/
struct tasklet_head
{
struct tasklet_struct *head;
@@ -493,6 +495,66 @@ void tasklet_kill(struct tasklet_struct *t)
EXPORT_SYMBOL(tasklet_kill);
/*
* tasklet_hrtimer
*/
/*
* The trampoline is called when the hrtimer expires. If this is
* called from the hrtimer interrupt then we schedule the tasklet as
* the timer callback function expects to run in softirq context. If
* it's called in softirq context anyway (i.e. high resolution timers
* disabled) then the hrtimer callback is called right away.
*/
static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
{
struct tasklet_hrtimer *ttimer =
container_of(timer, struct tasklet_hrtimer, timer);
if (hrtimer_is_hres_active(timer)) {
tasklet_hi_schedule(&ttimer->tasklet);
return HRTIMER_NORESTART;
}
return ttimer->function(timer);
}
/*
* Helper function which calls the hrtimer callback from
* tasklet/softirq context
*/
static void __tasklet_hrtimer_trampoline(unsigned long data)
{
struct tasklet_hrtimer *ttimer = (void *)data;
enum hrtimer_restart restart;
restart = ttimer->function(&ttimer->timer);
if (restart != HRTIMER_NORESTART)
hrtimer_restart(&ttimer->timer);
}
/**
* tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
* @ttimer: tasklet_hrtimer which is initialized
* @function: hrtimer callback funtion which gets called from softirq context
* @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
* @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
*/
void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
enum hrtimer_restart (*function)(struct hrtimer *),
clockid_t which_clock, enum hrtimer_mode mode)
{
hrtimer_init(&ttimer->timer, which_clock, mode);
ttimer->timer.function = __hrtimer_tasklet_trampoline;
tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
(unsigned long)ttimer);
ttimer->function = function;
}
EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
/*
* Remote softirq bits
*/
DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
EXPORT_PER_CPU_SYMBOL(softirq_work_list);

View File

@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
ktime_t clockevents_get_next_event(int cpu)
{
struct tick_device *td;
struct clock_event_device *dev;
td = &per_cpu(tick_cpu_device, cpu);
dev = td->evtdev;
return dev->next_event;
}
#endif

View File

@@ -513,7 +513,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
* Check to make sure we don't switch to a non-highres capable
* clocksource if the tick code is in oneshot mode (highres or nohz)
*/
if (tick_oneshot_mode_active() &&
if (tick_oneshot_mode_active() && ovr &&
!(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
printk(KERN_WARNING "%s clocksource is not HRT compatible. "
"Cannot switch while in HRT/NOHZ mode\n", ovr->name);

View File

@@ -714,7 +714,7 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
* networking code - if the timer is re-modified
* to be the same thing then just return:
*/
if (timer->expires == expires && timer_pending(timer))
if (timer_pending(timer) && timer->expires == expires)
return 1;
return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);

View File

@@ -226,13 +226,13 @@ config BOOT_TRACER
the timings of the initcalls and traces key events and the identity
of tasks that can cause boot delays, such as context-switches.
Its aim is to be parsed by the /scripts/bootgraph.pl tool to
Its aim is to be parsed by the scripts/bootgraph.pl tool to
produce pretty graphics about boot inefficiencies, giving a visual
representation of the delays during initcalls - but the raw
/debug/tracing/trace text output is readable too.
You must pass in ftrace=initcall to the kernel command line
to enable this on bootup.
You must pass in initcall_debug and ftrace=initcall to the kernel
command line to enable this on bootup.
config TRACE_BRANCH_PROFILING
bool

View File

@@ -22,6 +22,7 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
#include <linux/smp_lock.h>
#include <linux/time.h>
#include <linux/uaccess.h>
@@ -266,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt)
{
debugfs_remove(bt->msg_file);
debugfs_remove(bt->dropped_file);
debugfs_remove(bt->dir);
relay_close(bt->rchan);
debugfs_remove(bt->dir);
free_percpu(bt->sequence);
free_percpu(bt->msg_data);
kfree(bt);
@@ -377,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
static int blk_remove_buf_file_callback(struct dentry *dentry)
{
struct dentry *parent = dentry->d_parent;
debugfs_remove(dentry);
/*
* this will fail for all but the last file, but that is ok. what we
* care about is the top level buts->name directory going away, when
* the last trace file is gone. Then we don't have to rmdir() that
* manually on trace stop, so it nicely solves the issue with
* force killing of running traces.
*/
debugfs_remove(parent);
return 0;
}

View File

@@ -768,7 +768,7 @@ static struct tracer_stat function_stats __initdata = {
.stat_show = function_stat_show
};
static void ftrace_profile_debugfs(struct dentry *d_tracer)
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
struct ftrace_profile_stat *stat;
struct dentry *entry;
@@ -786,7 +786,6 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
* The files created are permanent, if something happens
* we still do not free memory.
*/
kfree(stat);
WARN(1,
"Could not allocate stat file for cpu %d\n",
cpu);
@@ -813,7 +812,7 @@ static void ftrace_profile_debugfs(struct dentry *d_tracer)
}
#else /* CONFIG_FUNCTION_PROFILER */
static void ftrace_profile_debugfs(struct dentry *d_tracer)
static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
{
}
#endif /* CONFIG_FUNCTION_PROFILER */
@@ -1663,7 +1662,7 @@ ftrace_regex_open(struct inode *inode, struct file *file, int enable)
mutex_lock(&ftrace_regex_lock);
if ((file->f_mode & FMODE_WRITE) &&
!(file->f_flags & O_APPEND))
(file->f_flags & O_TRUNC))
ftrace_filter_reset(enable);
if (file->f_mode & FMODE_READ) {
@@ -2578,7 +2577,7 @@ ftrace_graph_open(struct inode *inode, struct file *file)
mutex_lock(&graph_lock);
if ((file->f_mode & FMODE_WRITE) &&
!(file->f_flags & O_APPEND)) {
(file->f_flags & O_TRUNC)) {
ftrace_graph_count = 0;
memset(ftrace_graph_funcs, 0, sizeof(ftrace_graph_funcs));
}
@@ -2596,6 +2595,14 @@ ftrace_graph_open(struct inode *inode, struct file *file)
return ret;
}
static int
ftrace_graph_release(struct inode *inode, struct file *file)
{
if (file->f_mode & FMODE_READ)
seq_release(inode, file);
return 0;
}
static int
ftrace_set_func(unsigned long *array, int *idx, char *buffer)
{
@@ -2725,9 +2732,10 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
}
static const struct file_operations ftrace_graph_fops = {
.open = ftrace_graph_open,
.read = seq_read,
.write = ftrace_graph_write,
.open = ftrace_graph_open,
.read = seq_read,
.write = ftrace_graph_write,
.release = ftrace_graph_release,
};
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
@@ -3160,10 +3168,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
if (ret || !write || (last_ftrace_enabled == ftrace_enabled))
if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
goto out;
last_ftrace_enabled = ftrace_enabled;
last_ftrace_enabled = !!ftrace_enabled;
if (ftrace_enabled) {

View File

@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
put_online_cpus();
kfree(buffer->buffers);
free_cpumask_var(buffer->cpumask);
kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
*/
RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
if (!rb_try_to_discard(cpu_buffer, event))
if (rb_try_to_discard(cpu_buffer, event))
goto out;
/*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
* the box. Return the padding, and we will release
* the current locks, and try again.
*/
rb_advance_reader(cpu_buffer);
return event;
case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
* buffer too. A one time deal is all you get from reading
* the ring buffer from an NMI.
*/
if (likely(!in_nmi() && !oops_in_progress))
if (likely(!in_nmi()))
return 1;
tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
if (dolock)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
rb_advance_reader(cpu_buffer);
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
if (!event)
goto out_unlock;
if (event)
rb_advance_reader(cpu_buffer);
rb_advance_reader(cpu_buffer);
out_unlock:
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);

View File

@@ -17,6 +17,7 @@
#include <linux/writeback.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/smp_lock.h>
#include <linux/notifier.h>
#include <linux/irqflags.h>
#include <linux/debugfs.h>
@@ -847,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
int type,
@@ -2030,7 +2032,7 @@ static int tracing_open(struct inode *inode, struct file *file)
/* If this file was open for write, then erase contents */
if ((file->f_mode & FMODE_WRITE) &&
!(file->f_flags & O_APPEND)) {
(file->f_flags & O_TRUNC)) {
long cpu = (long) inode->i_private;
if (cpu == TRACE_PIPE_ALL_CPU)
@@ -3084,7 +3086,8 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
break;
}
trace_consume(iter);
if (ret != TRACE_TYPE_NO_CONSUME)
trace_consume(iter);
rem -= count;
if (!find_next_entry_inc(iter)) {
rem = 0;
@@ -4232,8 +4235,11 @@ static void __ftrace_dump(bool disable_tracing)
iter.pos = -1;
if (find_next_entry_inc(&iter) != NULL) {
print_trace_line(&iter);
trace_consume(&iter);
int ret;
ret = print_trace_line(&iter);
if (ret != TRACE_TYPE_NO_CONSUME)
trace_consume(&iter);
}
trace_printk_seq(&iter.seq);

View File

@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
void tracing_generic_entry_update(struct trace_entry *entry,
unsigned long flags,
int pc);
void default_wait_pipe(struct trace_iterator *iter);
void poll_wait_pipe(struct trace_iterator *iter);

View File

@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
if (event->id == event_id && event->profile_enable) {
ret = event->profile_enable(event);
break;
}

View File

@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
ftrace_graph_ret_entry, ignore,
TRACE_STRUCT(
TRACE_FIELD(unsigned long, ret.func, func)
TRACE_FIELD(unsigned long long, ret.calltime, calltime)
TRACE_FIELD(unsigned long long, ret.rettime, rettime)
TRACE_FIELD(unsigned long, ret.overrun, overrun)
TRACE_FIELD(int, ret.depth, depth)
),
TP_RAW_FMT("<-- %lx (%d)")

View File

@@ -376,7 +376,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
const struct seq_operations *seq_ops;
if ((file->f_mode & FMODE_WRITE) &&
!(file->f_flags & O_APPEND))
(file->f_flags & O_TRUNC))
ftrace_clear_events();
seq_ops = inode->i_private;
@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
entry = trace_create_file("enable", 0644, call->dir, call,
enable);
if (call->id)
if (call->id && call->profile_enable)
entry = trace_create_file("id", 0444, call->dir, call,
id);

View File

@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
return -ENOSPC;
}
filter->preds[filter->n_preds] = pred;
filter->n_preds++;
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
}
replace_filter_string(call->filter, filter_string);
}
filter->preds[filter->n_preds] = pred;
filter->n_preds++;
out:
return err;
}
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
if (elt->op == OP_AND || elt->op == OP_OR) {
pred = create_logical_pred(elt->op);
if (!pred)
return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
} else
} else {
err = filter_add_subsystem_pred(ps, system,
pred, filter_string);
if (err)
filter_free_pred(pred);
}
if (err)
return err;
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
}
pred = create_pred(elt->op, operand1, operand2);
if (!pred)
return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
} else
} else {
err = filter_add_subsystem_pred(ps, system, pred,
filter_string);
if (err)
filter_free_pred(pred);
}
if (err)
return err;

View File

@@ -363,7 +363,7 @@ ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
out_reg:
ret = register_ftrace_function_probe(glob, ops, count);
return ret;
return ret < 0 ? ret : 0;
}
static struct ftrace_func_command ftrace_traceon_cmd = {

View File

@@ -843,9 +843,16 @@ print_graph_function(struct trace_iterator *iter)
switch (entry->type) {
case TRACE_GRAPH_ENT: {
struct ftrace_graph_ent_entry *field;
/*
* print_graph_entry() may consume the current event,
* thus @field may become invalid, so we need to save it.
* sizeof(struct ftrace_graph_ent_entry) is very small,
* it can be safely saved at the stack.
*/
struct ftrace_graph_ent_entry *field, saved;
trace_assign_type(field, entry);
return print_graph_entry(field, s, iter);
saved = *field;
return print_graph_entry(&saved, s, iter);
}
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;

View File

@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
{
int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
s->buffer[len] = 0;
seq_puts(m, s->buffer);
seq_write(m, s->buffer, len);
trace_seq_init(s);
}

View File

@@ -176,7 +176,7 @@ static int t_show(struct seq_file *m, void *v)
const char *str = *fmt;
int i;
seq_printf(m, "0x%lx : \"", (unsigned long)fmt);
seq_printf(m, "0x%lx : \"", *(unsigned long *)fmt);
/*
* Tabs and new lines need to be converted.

View File

@@ -301,17 +301,14 @@ static const struct seq_operations stack_trace_seq_ops = {
static int stack_trace_open(struct inode *inode, struct file *file)
{
int ret;
ret = seq_open(file, &stack_trace_seq_ops);
return ret;
return seq_open(file, &stack_trace_seq_ops);
}
static const struct file_operations stack_trace_fops = {
.open = stack_trace_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
int
@@ -326,10 +323,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
if (ret || !write ||
(last_stack_tracer_enabled == stack_tracer_enabled))
(last_stack_tracer_enabled == !!stack_tracer_enabled))
goto out;
last_stack_tracer_enabled = stack_tracer_enabled;
last_stack_tracer_enabled = !!stack_tracer_enabled;
if (stack_tracer_enabled)
register_ftrace_function(&trace_ops);

View File

@@ -73,7 +73,7 @@ static struct rb_node *release_next(struct rb_node *node)
}
}
static void reset_stat_session(struct stat_session *session)
static void __reset_stat_session(struct stat_session *session)
{
struct rb_node *node = session->stat_root.rb_node;
@@ -83,10 +83,17 @@ static void reset_stat_session(struct stat_session *session)
session->stat_root = RB_ROOT;
}
static void reset_stat_session(struct stat_session *session)
{
mutex_lock(&session->stat_mutex);
__reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
}
static void destroy_session(struct stat_session *session)
{
debugfs_remove(session->file);
reset_stat_session(session);
__reset_stat_session(session);
mutex_destroy(&session->stat_mutex);
kfree(session);
}
@@ -150,7 +157,7 @@ static int stat_seq_init(struct stat_session *session)
int i;
mutex_lock(&session->stat_mutex);
reset_stat_session(session);
__reset_stat_session(session);
if (!ts->stat_cmp)
ts->stat_cmp = dummy_cmp;
@@ -183,7 +190,7 @@ exit:
return ret;
exit_free_rbtree:
reset_stat_session(session);
__reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
return ret;
}
@@ -250,16 +257,21 @@ static const struct seq_operations trace_stat_seq_ops = {
static int tracing_stat_open(struct inode *inode, struct file *file)
{
int ret;
struct seq_file *m;
struct stat_session *session = inode->i_private;
ret = stat_seq_init(session);
if (ret)
return ret;
ret = seq_open(file, &trace_stat_seq_ops);
if (!ret) {
struct seq_file *m = file->private_data;
m->private = session;
ret = stat_seq_init(session);
if (ret) {
reset_stat_session(session);
return ret;
}
m = file->private_data;
m->private = session;
return ret;
}
@@ -270,11 +282,9 @@ static int tracing_stat_release(struct inode *i, struct file *f)
{
struct stat_session *session = i->i_private;
mutex_lock(&session->stat_mutex);
reset_stat_session(session);
mutex_unlock(&session->stat_mutex);
return 0;
return seq_release(i, f);
}
static const struct file_operations tracing_stat_fops = {

View File

@@ -10,13 +10,14 @@
#include <linux/wait.h>
#include <linux/hash.h>
void init_waitqueue_head(wait_queue_head_t *q)
void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
{
spin_lock_init(&q->lock);
lockdep_set_class(&q->lock, key);
INIT_LIST_HEAD(&q->task_list);
}
EXPORT_SYMBOL(init_waitqueue_head);
EXPORT_SYMBOL(__init_waitqueue_head);
void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
{