Merge commit 'linus/master' into merge-linus
Conflicts: arch/x86/kvm/i8254.c
This commit is contained in:
@@ -548,7 +548,7 @@ static void do_acct_process(struct bsd_acct_struct *acct,
|
||||
#endif
|
||||
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
tty = current->signal->tty;
|
||||
tty = current->signal->tty; /* Safe as we hold the siglock */
|
||||
ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
|
||||
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
|
||||
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
|
||||
|
@@ -246,8 +246,8 @@ static int audit_match_perm(struct audit_context *ctx, int mask)
|
||||
unsigned n;
|
||||
if (unlikely(!ctx))
|
||||
return 0;
|
||||
|
||||
n = ctx->major;
|
||||
|
||||
switch (audit_classify_syscall(ctx->arch, n)) {
|
||||
case 0: /* native */
|
||||
if ((mask & AUDIT_PERM_WRITE) &&
|
||||
@@ -1204,13 +1204,13 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
|
||||
(context->return_valid==AUDITSC_SUCCESS)?"yes":"no",
|
||||
context->return_code);
|
||||
|
||||
mutex_lock(&tty_mutex);
|
||||
read_lock(&tasklist_lock);
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
|
||||
tty = tsk->signal->tty->name;
|
||||
else
|
||||
tty = "(none)";
|
||||
read_unlock(&tasklist_lock);
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
|
||||
audit_log_format(ab,
|
||||
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
|
||||
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
|
||||
@@ -1230,7 +1230,6 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
|
||||
context->egid, context->sgid, context->fsgid, tty,
|
||||
tsk->sessionid);
|
||||
|
||||
mutex_unlock(&tty_mutex);
|
||||
|
||||
audit_log_task_info(ab, tsk);
|
||||
if (context->filterkey) {
|
||||
|
@@ -2735,21 +2735,24 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
||||
* Called on every change to mm->owner. mm_init_owner() does not
|
||||
* invoke this routine, since it assigns the mm->owner the first time
|
||||
* and does not change it.
|
||||
*
|
||||
* The callbacks are invoked with mmap_sem held in read mode.
|
||||
*/
|
||||
void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
|
||||
{
|
||||
struct cgroup *oldcgrp, *newcgrp;
|
||||
struct cgroup *oldcgrp, *newcgrp = NULL;
|
||||
|
||||
if (need_mm_owner_callback) {
|
||||
int i;
|
||||
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys *ss = subsys[i];
|
||||
oldcgrp = task_cgroup(old, ss->subsys_id);
|
||||
newcgrp = task_cgroup(new, ss->subsys_id);
|
||||
if (new)
|
||||
newcgrp = task_cgroup(new, ss->subsys_id);
|
||||
if (oldcgrp == newcgrp)
|
||||
continue;
|
||||
if (ss->mm_owner_changed)
|
||||
ss->mm_owner_changed(ss, oldcgrp, newcgrp);
|
||||
ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -26,6 +26,64 @@
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/*
|
||||
* Note that the native side is already converted to a timespec, because
|
||||
* that's what we want anyway.
|
||||
*/
|
||||
static int compat_get_timeval(struct timespec *o,
|
||||
struct compat_timeval __user *i)
|
||||
{
|
||||
long usec;
|
||||
|
||||
if (get_user(o->tv_sec, &i->tv_sec) ||
|
||||
get_user(usec, &i->tv_usec))
|
||||
return -EFAULT;
|
||||
o->tv_nsec = usec * 1000;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compat_put_timeval(struct compat_timeval __user *o,
|
||||
struct timeval *i)
|
||||
{
|
||||
return (put_user(i->tv_sec, &o->tv_sec) ||
|
||||
put_user(i->tv_usec, &o->tv_usec)) ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
|
||||
struct timezone __user *tz)
|
||||
{
|
||||
if (tv) {
|
||||
struct timeval ktv;
|
||||
do_gettimeofday(&ktv);
|
||||
if (compat_put_timeval(tv, &ktv))
|
||||
return -EFAULT;
|
||||
}
|
||||
if (tz) {
|
||||
if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
|
||||
struct timezone __user *tz)
|
||||
{
|
||||
struct timespec kts;
|
||||
struct timezone ktz;
|
||||
|
||||
if (tv) {
|
||||
if (compat_get_timeval(&kts, tv))
|
||||
return -EFAULT;
|
||||
}
|
||||
if (tz) {
|
||||
if (copy_from_user(&ktz, tz, sizeof(ktz)))
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
|
||||
}
|
||||
|
||||
int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
|
||||
{
|
||||
return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
|
||||
|
24
kernel/cpu.c
24
kernel/cpu.c
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
|
||||
struct take_cpu_down_param *param = _param;
|
||||
int err;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
||||
param->hcpu);
|
||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||
err = __cpu_disable();
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
||||
param->hcpu);
|
||||
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
@@ -453,6 +454,25 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
|
||||
/**
|
||||
* notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
|
||||
* @cpu: cpu that just started
|
||||
*
|
||||
* This function calls the cpu_chain notifiers with CPU_STARTING.
|
||||
* It must be called by the arch code on the new cpu, before the new cpu
|
||||
* enables interrupts and before the "boot" cpu returns from __cpu_up().
|
||||
*/
|
||||
void notify_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned long val = CPU_STARTING;
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP_SMP
|
||||
if (cpu_isset(cpu, frozen_cpus))
|
||||
val = CPU_STARTING_FROZEN;
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
|
357
kernel/cpuset.c
357
kernel/cpuset.c
@@ -14,6 +14,8 @@
|
||||
* 2003-10-22 Updates by Stephen Hemminger.
|
||||
* 2004 May-July Rework by Paul Jackson.
|
||||
* 2006 Rework by Paul Menage to use generic cgroups
|
||||
* 2008 Rework of the scheduler domains and CPU hotplug handling
|
||||
* by Max Krasnyansky
|
||||
*
|
||||
* This file is subject to the terms and conditions of the GNU General Public
|
||||
* License. See the file COPYING in the main directory of the Linux
|
||||
@@ -236,9 +238,11 @@ static struct cpuset top_cpuset = {
|
||||
|
||||
static DEFINE_MUTEX(callback_mutex);
|
||||
|
||||
/* This is ugly, but preserves the userspace API for existing cpuset
|
||||
/*
|
||||
* This is ugly, but preserves the userspace API for existing cpuset
|
||||
* users. If someone tries to mount the "cpuset" filesystem, we
|
||||
* silently switch it to mount "cgroup" instead */
|
||||
* silently switch it to mount "cgroup" instead
|
||||
*/
|
||||
static int cpuset_get_sb(struct file_system_type *fs_type,
|
||||
int flags, const char *unused_dev_name,
|
||||
void *data, struct vfsmount *mnt)
|
||||
@@ -473,10 +477,9 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper routine for rebuild_sched_domains().
|
||||
* Helper routine for generate_sched_domains().
|
||||
* Do cpusets a, b have overlapping cpus_allowed masks?
|
||||
*/
|
||||
|
||||
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
|
||||
{
|
||||
return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
|
||||
@@ -518,26 +521,15 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
}
|
||||
|
||||
/*
|
||||
* rebuild_sched_domains()
|
||||
* generate_sched_domains()
|
||||
*
|
||||
* This routine will be called to rebuild the scheduler's dynamic
|
||||
* sched domains:
|
||||
* - if the flag 'sched_load_balance' of any cpuset with non-empty
|
||||
* 'cpus' changes,
|
||||
* - or if the 'cpus' allowed changes in any cpuset which has that
|
||||
* flag enabled,
|
||||
* - or if the 'sched_relax_domain_level' of any cpuset which has
|
||||
* that flag enabled and with non-empty 'cpus' changes,
|
||||
* - or if any cpuset with non-empty 'cpus' is removed,
|
||||
* - or if a cpu gets offlined.
|
||||
*
|
||||
* This routine builds a partial partition of the systems CPUs
|
||||
* (the set of non-overlappping cpumask_t's in the array 'part'
|
||||
* below), and passes that partial partition to the kernel/sched.c
|
||||
* partition_sched_domains() routine, which will rebuild the
|
||||
* schedulers load balancing domains (sched domains) as specified
|
||||
* by that partial partition. A 'partial partition' is a set of
|
||||
* non-overlapping subsets whose union is a subset of that set.
|
||||
* This function builds a partial partition of the systems CPUs
|
||||
* A 'partial partition' is a set of non-overlapping subsets whose
|
||||
* union is a subset of that set.
|
||||
* The output of this function needs to be passed to kernel/sched.c
|
||||
* partition_sched_domains() routine, which will rebuild the scheduler's
|
||||
* load balancing domains (sched domains) as specified by that partial
|
||||
* partition.
|
||||
*
|
||||
* See "What is sched_load_balance" in Documentation/cpusets.txt
|
||||
* for a background explanation of this.
|
||||
@@ -547,13 +539,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
* domains when operating in the severe memory shortage situations
|
||||
* that could cause allocation failures below.
|
||||
*
|
||||
* Call with cgroup_mutex held. May take callback_mutex during
|
||||
* call due to the kfifo_alloc() and kmalloc() calls. May nest
|
||||
* a call to the get_online_cpus()/put_online_cpus() pair.
|
||||
* Must not be called holding callback_mutex, because we must not
|
||||
* call get_online_cpus() while holding callback_mutex. Elsewhere
|
||||
* the kernel nests callback_mutex inside get_online_cpus() calls.
|
||||
* So the reverse nesting would risk an ABBA deadlock.
|
||||
* Must be called with cgroup_lock held.
|
||||
*
|
||||
* The three key local variables below are:
|
||||
* q - a linked-list queue of cpuset pointers, used to implement a
|
||||
@@ -588,10 +574,10 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
|
||||
* element of the partition (one sched domain) to be passed to
|
||||
* partition_sched_domains().
|
||||
*/
|
||||
|
||||
void rebuild_sched_domains(void)
|
||||
static int generate_sched_domains(cpumask_t **domains,
|
||||
struct sched_domain_attr **attributes)
|
||||
{
|
||||
LIST_HEAD(q); /* queue of cpusets to be scanned*/
|
||||
LIST_HEAD(q); /* queue of cpusets to be scanned */
|
||||
struct cpuset *cp; /* scans q */
|
||||
struct cpuset **csa; /* array of all cpuset ptrs */
|
||||
int csn; /* how many cpuset ptrs in csa so far */
|
||||
@@ -601,23 +587,26 @@ void rebuild_sched_domains(void)
|
||||
int ndoms; /* number of sched domains in result */
|
||||
int nslot; /* next empty doms[] cpumask_t slot */
|
||||
|
||||
csa = NULL;
|
||||
ndoms = 0;
|
||||
doms = NULL;
|
||||
dattr = NULL;
|
||||
csa = NULL;
|
||||
|
||||
/* Special case for the 99% of systems with one, full, sched domain */
|
||||
if (is_sched_load_balance(&top_cpuset)) {
|
||||
ndoms = 1;
|
||||
doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
goto done;
|
||||
|
||||
dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
if (dattr) {
|
||||
*dattr = SD_ATTR_INIT;
|
||||
update_domain_attr_tree(dattr, &top_cpuset);
|
||||
}
|
||||
*doms = top_cpuset.cpus_allowed;
|
||||
goto rebuild;
|
||||
|
||||
ndoms = 1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
csa = kmalloc(number_of_cpusets * sizeof(cp), GFP_KERNEL);
|
||||
@@ -680,61 +669,141 @@ restart:
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert <csn, csa> to <ndoms, doms> */
|
||||
/*
|
||||
* Now we know how many domains to create.
|
||||
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
|
||||
*/
|
||||
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
|
||||
if (!doms)
|
||||
goto rebuild;
|
||||
if (!doms) {
|
||||
ndoms = 0;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* The rest of the code, including the scheduler, can deal with
|
||||
* dattr==NULL case. No need to abort if alloc fails.
|
||||
*/
|
||||
dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
|
||||
|
||||
for (nslot = 0, i = 0; i < csn; i++) {
|
||||
struct cpuset *a = csa[i];
|
||||
cpumask_t *dp;
|
||||
int apn = a->pn;
|
||||
|
||||
if (apn >= 0) {
|
||||
cpumask_t *dp = doms + nslot;
|
||||
|
||||
if (nslot == ndoms) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_WARNING
|
||||
"rebuild_sched_domains confused:"
|
||||
" nslot %d, ndoms %d, csn %d, i %d,"
|
||||
" apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
warnings--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
cpus_clear(*dp);
|
||||
if (dattr)
|
||||
*(dattr + nslot) = SD_ATTR_INIT;
|
||||
for (j = i; j < csn; j++) {
|
||||
struct cpuset *b = csa[j];
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpus_or(*dp, *dp, b->cpus_allowed);
|
||||
b->pn = -1;
|
||||
if (dattr)
|
||||
update_domain_attr_tree(dattr
|
||||
+ nslot, b);
|
||||
}
|
||||
}
|
||||
nslot++;
|
||||
if (apn < 0) {
|
||||
/* Skip completed partitions */
|
||||
continue;
|
||||
}
|
||||
|
||||
dp = doms + nslot;
|
||||
|
||||
if (nslot == ndoms) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_WARNING
|
||||
"rebuild_sched_domains confused:"
|
||||
" nslot %d, ndoms %d, csn %d, i %d,"
|
||||
" apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
warnings--;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
cpus_clear(*dp);
|
||||
if (dattr)
|
||||
*(dattr + nslot) = SD_ATTR_INIT;
|
||||
for (j = i; j < csn; j++) {
|
||||
struct cpuset *b = csa[j];
|
||||
|
||||
if (apn == b->pn) {
|
||||
cpus_or(*dp, *dp, b->cpus_allowed);
|
||||
if (dattr)
|
||||
update_domain_attr_tree(dattr + nslot, b);
|
||||
|
||||
/* Done with this partition */
|
||||
b->pn = -1;
|
||||
}
|
||||
}
|
||||
nslot++;
|
||||
}
|
||||
BUG_ON(nslot != ndoms);
|
||||
|
||||
rebuild:
|
||||
/* Have scheduler rebuild sched domains */
|
||||
get_online_cpus();
|
||||
partition_sched_domains(ndoms, doms, dattr);
|
||||
put_online_cpus();
|
||||
|
||||
done:
|
||||
kfree(csa);
|
||||
/* Don't kfree(doms) -- partition_sched_domains() does that. */
|
||||
/* Don't kfree(dattr) -- partition_sched_domains() does that. */
|
||||
|
||||
*domains = doms;
|
||||
*attributes = dattr;
|
||||
return ndoms;
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebuild scheduler domains.
|
||||
*
|
||||
* Call with neither cgroup_mutex held nor within get_online_cpus().
|
||||
* Takes both cgroup_mutex and get_online_cpus().
|
||||
*
|
||||
* Cannot be directly called from cpuset code handling changes
|
||||
* to the cpuset pseudo-filesystem, because it cannot be called
|
||||
* from code that already holds cgroup_mutex.
|
||||
*/
|
||||
static void do_rebuild_sched_domains(struct work_struct *unused)
|
||||
{
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_t *doms;
|
||||
int ndoms;
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
/* Generate domain masks and attrs */
|
||||
cgroup_lock();
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
cgroup_unlock();
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
|
||||
|
||||
/*
|
||||
* Rebuild scheduler domains, asynchronously via workqueue.
|
||||
*
|
||||
* If the flag 'sched_load_balance' of any cpuset with non-empty
|
||||
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
|
||||
* which has that flag enabled, or if any cpuset with a non-empty
|
||||
* 'cpus' is removed, then call this routine to rebuild the
|
||||
* scheduler's dynamic sched domains.
|
||||
*
|
||||
* The rebuild_sched_domains() and partition_sched_domains()
|
||||
* routines must nest cgroup_lock() inside get_online_cpus(),
|
||||
* but such cpuset changes as these must nest that locking the
|
||||
* other way, holding cgroup_lock() for much of the code.
|
||||
*
|
||||
* So in order to avoid an ABBA deadlock, the cpuset code handling
|
||||
* these user changes delegates the actual sched domain rebuilding
|
||||
* to a separate workqueue thread, which ends up processing the
|
||||
* above do_rebuild_sched_domains() function.
|
||||
*/
|
||||
static void async_rebuild_sched_domains(void)
|
||||
{
|
||||
schedule_work(&rebuild_sched_domains_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accomplishes the same scheduler domain rebuild as the above
|
||||
* async_rebuild_sched_domains(), however it directly calls the
|
||||
* rebuild routine synchronously rather than calling it via an
|
||||
* asynchronous work thread.
|
||||
*
|
||||
* This can only be called from code that is not holding
|
||||
* cgroup_mutex (not nested in a cgroup_lock() call.)
|
||||
*/
|
||||
void rebuild_sched_domains(void)
|
||||
{
|
||||
do_rebuild_sched_domains(NULL);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -774,37 +843,25 @@ static void cpuset_change_cpumask(struct task_struct *tsk,
|
||||
/**
|
||||
* update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
|
||||
* @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
|
||||
* @heap: if NULL, defer allocating heap memory to cgroup_scan_tasks()
|
||||
*
|
||||
* Called with cgroup_mutex held
|
||||
*
|
||||
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
|
||||
* calling callback functions for each.
|
||||
*
|
||||
* Return 0 if successful, -errno if not.
|
||||
* No return value. It's guaranteed that cgroup_scan_tasks() always returns 0
|
||||
* if @heap != NULL.
|
||||
*/
|
||||
static int update_tasks_cpumask(struct cpuset *cs)
|
||||
static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
|
||||
{
|
||||
struct cgroup_scanner scan;
|
||||
struct ptr_heap heap;
|
||||
int retval;
|
||||
|
||||
/*
|
||||
* cgroup_scan_tasks() will initialize heap->gt for us.
|
||||
* heap_init() is still needed here for we should not change
|
||||
* cs->cpus_allowed when heap_init() fails.
|
||||
*/
|
||||
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
scan.cg = cs->css.cgroup;
|
||||
scan.test_task = cpuset_test_cpumask;
|
||||
scan.process_task = cpuset_change_cpumask;
|
||||
scan.heap = &heap;
|
||||
retval = cgroup_scan_tasks(&scan);
|
||||
|
||||
heap_free(&heap);
|
||||
return retval;
|
||||
scan.heap = heap;
|
||||
cgroup_scan_tasks(&scan);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -814,6 +871,7 @@ static int update_tasks_cpumask(struct cpuset *cs)
|
||||
*/
|
||||
static int update_cpumask(struct cpuset *cs, const char *buf)
|
||||
{
|
||||
struct ptr_heap heap;
|
||||
struct cpuset trialcs;
|
||||
int retval;
|
||||
int is_load_balanced;
|
||||
@@ -848,6 +906,10 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
|
||||
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
|
||||
return 0;
|
||||
|
||||
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
|
||||
if (retval)
|
||||
return retval;
|
||||
|
||||
is_load_balanced = is_sched_load_balance(&trialcs);
|
||||
|
||||
mutex_lock(&callback_mutex);
|
||||
@@ -858,12 +920,12 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
|
||||
* Scan tasks in the cpuset, and update the cpumasks of any
|
||||
* that need an update.
|
||||
*/
|
||||
retval = update_tasks_cpumask(cs);
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
update_tasks_cpumask(cs, &heap);
|
||||
|
||||
heap_free(&heap);
|
||||
|
||||
if (is_load_balanced)
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1090,7 +1152,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
|
||||
if (val != cs->relax_domain_level) {
|
||||
cs->relax_domain_level = val;
|
||||
if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -1131,7 +1193,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
|
||||
mutex_unlock(&callback_mutex);
|
||||
|
||||
if (cpus_nonempty && balance_flag_changed)
|
||||
rebuild_sched_domains();
|
||||
async_rebuild_sched_domains();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1492,6 +1554,9 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Unreachable but makes gcc happy */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
|
||||
@@ -1504,6 +1569,9 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* Unrechable but makes gcc happy */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -1692,15 +1760,9 @@ static struct cgroup_subsys_state *cpuset_create(
|
||||
}
|
||||
|
||||
/*
|
||||
* Locking note on the strange update_flag() call below:
|
||||
*
|
||||
* If the cpuset being removed has its flag 'sched_load_balance'
|
||||
* enabled, then simulate turning sched_load_balance off, which
|
||||
* will call rebuild_sched_domains(). The get_online_cpus()
|
||||
* call in rebuild_sched_domains() must not be made while holding
|
||||
* callback_mutex. Elsewhere the kernel nests callback_mutex inside
|
||||
* get_online_cpus() calls. So the reverse nesting would risk an
|
||||
* ABBA deadlock.
|
||||
* will call async_rebuild_sched_domains().
|
||||
*/
|
||||
|
||||
static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
@@ -1719,7 +1781,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
|
||||
struct cgroup_subsys cpuset_subsys = {
|
||||
.name = "cpuset",
|
||||
.create = cpuset_create,
|
||||
.destroy = cpuset_destroy,
|
||||
.destroy = cpuset_destroy,
|
||||
.can_attach = cpuset_can_attach,
|
||||
.attach = cpuset_attach,
|
||||
.populate = cpuset_populate,
|
||||
@@ -1811,7 +1873,7 @@ static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
|
||||
}
|
||||
|
||||
/*
|
||||
* If common_cpu_mem_hotplug_unplug(), below, unplugs any CPUs
|
||||
* If CPU and/or memory hotplug handlers, below, unplug any CPUs
|
||||
* or memory nodes, we need to walk over the cpuset hierarchy,
|
||||
* removing that CPU or node from all cpusets. If this removes the
|
||||
* last CPU or node from a cpuset, then move the tasks in the empty
|
||||
@@ -1859,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
||||
* that has tasks along with an empty 'mems'. But if we did see such
|
||||
* a cpuset, we'd handle it just like we do if its 'cpus' was empty.
|
||||
*/
|
||||
static void scan_for_empty_cpusets(const struct cpuset *root)
|
||||
static void scan_for_empty_cpusets(struct cpuset *root)
|
||||
{
|
||||
LIST_HEAD(queue);
|
||||
struct cpuset *cp; /* scans cpusets being updated */
|
||||
@@ -1896,41 +1958,12 @@ static void scan_for_empty_cpusets(const struct cpuset *root)
|
||||
nodes_empty(cp->mems_allowed))
|
||||
remove_tasks_in_empty_cpuset(cp);
|
||||
else {
|
||||
update_tasks_cpumask(cp);
|
||||
update_tasks_cpumask(cp, NULL);
|
||||
update_tasks_nodemask(cp, &oldmems);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The cpus_allowed and mems_allowed nodemasks in the top_cpuset track
|
||||
* cpu_online_map and node_states[N_HIGH_MEMORY]. Force the top cpuset to
|
||||
* track what's online after any CPU or memory node hotplug or unplug event.
|
||||
*
|
||||
* Since there are two callers of this routine, one for CPU hotplug
|
||||
* events and one for memory node hotplug events, we could have coded
|
||||
* two separate routines here. We code it as a single common routine
|
||||
* in order to minimize text size.
|
||||
*/
|
||||
|
||||
static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
|
||||
{
|
||||
cgroup_lock();
|
||||
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
|
||||
/*
|
||||
* Scheduler destroys domains on hotplug events.
|
||||
* Rebuild them based on the current settings.
|
||||
*/
|
||||
if (rebuild_sd)
|
||||
rebuild_sched_domains();
|
||||
|
||||
cgroup_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* The top_cpuset tracks what CPUs and Memory Nodes are online,
|
||||
* period. This is necessary in order to make cpusets transparent
|
||||
@@ -1939,40 +1972,52 @@ static void common_cpu_mem_hotplug_unplug(int rebuild_sd)
|
||||
*
|
||||
* This routine ensures that top_cpuset.cpus_allowed tracks
|
||||
* cpu_online_map on each CPU hotplug (cpuhp) event.
|
||||
*
|
||||
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
||||
* before calling generate_sched_domains().
|
||||
*/
|
||||
|
||||
static int cpuset_handle_cpuhp(struct notifier_block *unused_nb,
|
||||
static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
|
||||
unsigned long phase, void *unused_cpu)
|
||||
{
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_t *doms;
|
||||
int ndoms;
|
||||
|
||||
switch (phase) {
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
case CPU_DOWN_FAILED:
|
||||
case CPU_DOWN_FAILED_FROZEN:
|
||||
case CPU_ONLINE:
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
common_cpu_mem_hotplug_unplug(1);
|
||||
break;
|
||||
|
||||
default:
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
cgroup_lock();
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
cgroup_unlock();
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
|
||||
* Call this routine anytime after you change
|
||||
* node_states[N_HIGH_MEMORY].
|
||||
* See also the previous routine cpuset_handle_cpuhp().
|
||||
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
|
||||
* See also the previous routine cpuset_track_online_cpus().
|
||||
*/
|
||||
|
||||
void cpuset_track_online_nodes(void)
|
||||
{
|
||||
common_cpu_mem_hotplug_unplug(0);
|
||||
cgroup_lock();
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
scan_for_empty_cpusets(&top_cpuset);
|
||||
cgroup_unlock();
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1987,7 +2032,7 @@ void __init cpuset_init_smp(void)
|
||||
top_cpuset.cpus_allowed = cpu_online_map;
|
||||
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
|
||||
|
||||
hotcpu_notifier(cpuset_handle_cpuhp, 0);
|
||||
hotcpu_notifier(cpuset_track_online_cpus, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -124,6 +124,7 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size,
|
||||
}
|
||||
return (mem != NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(dma_alloc_from_coherent);
|
||||
|
||||
/**
|
||||
* dma_release_from_coherent() - try to free the memory allocated from per-device coherent memory pool
|
||||
@@ -151,3 +152,4 @@ int dma_release_from_coherent(struct device *dev, int order, void *vaddr)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(dma_release_from_coherent);
|
||||
|
@@ -1,4 +1,4 @@
|
||||
/* $Id: dma.c,v 1.7 1994/12/28 03:35:33 root Exp root $
|
||||
/*
|
||||
* linux/kernel/dma.c: A DMA channel allocator. Inspired by linux/kernel/irq.c.
|
||||
*
|
||||
* Written by Hennus Bergman, 1992.
|
||||
|
@@ -583,8 +583,6 @@ mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
|
||||
* If there are other users of the mm and the owner (us) is exiting
|
||||
* we need to find a new owner to take on the responsibility.
|
||||
*/
|
||||
if (!mm)
|
||||
return 0;
|
||||
if (atomic_read(&mm->mm_users) <= 1)
|
||||
return 0;
|
||||
if (mm->owner != p)
|
||||
@@ -627,29 +625,38 @@ retry:
|
||||
} while_each_thread(g, c);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
/*
|
||||
* We found no owner yet mm_users > 1: this implies that we are
|
||||
* most likely racing with swapoff (try_to_unuse()) or /proc or
|
||||
* ptrace or page migration (get_task_mm()). Mark owner as NULL,
|
||||
* so that subsystems can understand the callback and take action.
|
||||
*/
|
||||
down_write(&mm->mmap_sem);
|
||||
cgroup_mm_owner_callbacks(mm->owner, NULL);
|
||||
mm->owner = NULL;
|
||||
up_write(&mm->mmap_sem);
|
||||
return;
|
||||
|
||||
assign_new_owner:
|
||||
BUG_ON(c == p);
|
||||
get_task_struct(c);
|
||||
read_unlock(&tasklist_lock);
|
||||
down_write(&mm->mmap_sem);
|
||||
/*
|
||||
* The task_lock protects c->mm from changing.
|
||||
* We always want mm->owner->mm == mm
|
||||
*/
|
||||
task_lock(c);
|
||||
/*
|
||||
* Delay read_unlock() till we have the task_lock()
|
||||
* to ensure that c does not slip away underneath us
|
||||
*/
|
||||
read_unlock(&tasklist_lock);
|
||||
if (c->mm != mm) {
|
||||
task_unlock(c);
|
||||
up_write(&mm->mmap_sem);
|
||||
put_task_struct(c);
|
||||
goto retry;
|
||||
}
|
||||
cgroup_mm_owner_callbacks(mm->owner, c);
|
||||
mm->owner = c;
|
||||
task_unlock(c);
|
||||
up_write(&mm->mmap_sem);
|
||||
put_task_struct(c);
|
||||
}
|
||||
#endif /* CONFIG_MM_OWNER */
|
||||
|
@@ -802,6 +802,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
||||
|
||||
sig->leader = 0; /* session leadership doesn't inherit */
|
||||
sig->tty_old_pgrp = NULL;
|
||||
sig->tty = NULL;
|
||||
|
||||
sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
|
||||
sig->gtime = cputime_zero;
|
||||
@@ -838,6 +839,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
|
||||
void __cleanup_signal(struct signal_struct *sig)
|
||||
{
|
||||
exit_thread_group_keys(sig);
|
||||
tty_kref_put(sig->tty);
|
||||
kmem_cache_free(signal_cachep, sig);
|
||||
}
|
||||
|
||||
@@ -1229,7 +1231,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
p->nsproxy->pid_ns->child_reaper = p;
|
||||
|
||||
p->signal->leader_pid = pid;
|
||||
p->signal->tty = current->signal->tty;
|
||||
tty_kref_put(p->signal->tty);
|
||||
p->signal->tty = tty_kref_get(current->signal->tty);
|
||||
set_task_pgrp(p, task_pgrp_nr(current));
|
||||
set_task_session(p, task_session_nr(current));
|
||||
attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
|
||||
|
@@ -672,13 +672,14 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
||||
*/
|
||||
BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
|
||||
return 1;
|
||||
case HRTIMER_CB_IRQSAFE_NO_SOFTIRQ:
|
||||
case HRTIMER_CB_IRQSAFE_PERCPU:
|
||||
case HRTIMER_CB_IRQSAFE_UNLOCKED:
|
||||
/*
|
||||
* This is solely for the sched tick emulation with
|
||||
* dynamic tick support to ensure that we do not
|
||||
* restart the tick right on the edge and end up with
|
||||
* the tick timer in the softirq ! The calling site
|
||||
* takes care of this.
|
||||
* takes care of this. Also used for hrtimer sleeper !
|
||||
*/
|
||||
debug_hrtimer_deactivate(timer);
|
||||
return 1;
|
||||
@@ -1266,7 +1267,8 @@ static void __run_hrtimer(struct hrtimer *timer)
|
||||
timer_stats_account_hrtimer(timer);
|
||||
|
||||
fn = timer->function;
|
||||
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) {
|
||||
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
|
||||
timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) {
|
||||
/*
|
||||
* Used for scheduler timers, avoid lock inversion with
|
||||
* rq->lock and tasklist_lock.
|
||||
@@ -1517,7 +1519,7 @@ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
|
||||
sl->timer.function = hrtimer_wakeup;
|
||||
sl->task = task;
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
sl->timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -1661,29 +1663,95 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
||||
static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
|
||||
struct hrtimer_clock_base *new_base)
|
||||
static int migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
|
||||
struct hrtimer_clock_base *new_base, int dcpu)
|
||||
{
|
||||
struct hrtimer *timer;
|
||||
struct rb_node *node;
|
||||
int raise = 0;
|
||||
|
||||
while ((node = rb_first(&old_base->active))) {
|
||||
timer = rb_entry(node, struct hrtimer, node);
|
||||
BUG_ON(hrtimer_callback_running(timer));
|
||||
debug_hrtimer_deactivate(timer);
|
||||
__remove_hrtimer(timer, old_base, HRTIMER_STATE_INACTIVE, 0);
|
||||
|
||||
/*
|
||||
* Should not happen. Per CPU timers should be
|
||||
* canceled _before_ the migration code is called
|
||||
*/
|
||||
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU) {
|
||||
__remove_hrtimer(timer, old_base,
|
||||
HRTIMER_STATE_INACTIVE, 0);
|
||||
WARN(1, "hrtimer (%p %p)active but cpu %d dead\n",
|
||||
timer, timer->function, dcpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark it as STATE_MIGRATE not INACTIVE otherwise the
|
||||
* timer could be seen as !active and just vanish away
|
||||
* under us on another CPU
|
||||
*/
|
||||
__remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
|
||||
timer->base = new_base;
|
||||
/*
|
||||
* Enqueue the timer. Allow reprogramming of the event device
|
||||
*/
|
||||
enqueue_hrtimer(timer, new_base, 1);
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
/*
|
||||
* Happens with high res enabled when the timer was
|
||||
* already expired and the callback mode is
|
||||
* HRTIMER_CB_IRQSAFE_UNLOCKED (hrtimer_sleeper). The
|
||||
* enqueue code does not move them to the soft irq
|
||||
* pending list for performance/latency reasons, but
|
||||
* in the migration state, we need to do that
|
||||
* otherwise we end up with a stale timer.
|
||||
*/
|
||||
if (timer->state == HRTIMER_STATE_MIGRATE) {
|
||||
timer->state = HRTIMER_STATE_PENDING;
|
||||
list_add_tail(&timer->cb_entry,
|
||||
&new_base->cpu_base->cb_pending);
|
||||
raise = 1;
|
||||
}
|
||||
#endif
|
||||
/* Clear the migration state bit */
|
||||
timer->state &= ~HRTIMER_STATE_MIGRATE;
|
||||
}
|
||||
return raise;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
|
||||
struct hrtimer_cpu_base *new_base)
|
||||
{
|
||||
struct hrtimer *timer;
|
||||
int raise = 0;
|
||||
|
||||
while (!list_empty(&old_base->cb_pending)) {
|
||||
timer = list_entry(old_base->cb_pending.next,
|
||||
struct hrtimer, cb_entry);
|
||||
|
||||
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_PENDING, 0);
|
||||
timer->base = &new_base->clock_base[timer->base->index];
|
||||
list_add_tail(&timer->cb_entry, &new_base->cb_pending);
|
||||
raise = 1;
|
||||
}
|
||||
return raise;
|
||||
}
|
||||
#else
|
||||
static int migrate_hrtimer_pending(struct hrtimer_cpu_base *old_base,
|
||||
struct hrtimer_cpu_base *new_base)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void migrate_hrtimers(int cpu)
|
||||
{
|
||||
struct hrtimer_cpu_base *old_base, *new_base;
|
||||
int i;
|
||||
int i, raise = 0;
|
||||
|
||||
BUG_ON(cpu_online(cpu));
|
||||
old_base = &per_cpu(hrtimer_bases, cpu);
|
||||
@@ -1696,14 +1764,21 @@ static void migrate_hrtimers(int cpu)
|
||||
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
|
||||
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
|
||||
migrate_hrtimer_list(&old_base->clock_base[i],
|
||||
&new_base->clock_base[i]);
|
||||
if (migrate_hrtimer_list(&old_base->clock_base[i],
|
||||
&new_base->clock_base[i], cpu))
|
||||
raise = 1;
|
||||
}
|
||||
|
||||
if (migrate_hrtimer_pending(old_base, new_base))
|
||||
raise = 1;
|
||||
|
||||
spin_unlock(&old_base->lock);
|
||||
spin_unlock(&new_base->lock);
|
||||
local_irq_enable();
|
||||
put_cpu_var(hrtimer_bases);
|
||||
|
||||
if (raise)
|
||||
hrtimer_raise_softirq();
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
|
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
|
||||
set_balance_irq_affinity(irq, cpumask);
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
set_pending_irq(irq, cpumask);
|
||||
if (desc->status & IRQ_MOVE_PCNTXT) {
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
} else
|
||||
set_pending_irq(irq, cpumask);
|
||||
#else
|
||||
desc->affinity = cpumask;
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
|
@@ -260,7 +260,6 @@ const char *kallsyms_lookup(unsigned long addr,
|
||||
/* see if it's in a module */
|
||||
return module_address_lookup(addr, symbolsize, offset, modname,
|
||||
namebuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int lookup_symbol_name(unsigned long addr, char *symname)
|
||||
|
@@ -753,8 +753,14 @@ static struct page *kimage_alloc_page(struct kimage *image,
|
||||
*old = addr | (*old & ~PAGE_MASK);
|
||||
|
||||
/* The old page I have found cannot be a
|
||||
* destination page, so return it.
|
||||
* destination page, so return it if it's
|
||||
* gfp_flags honor the ones passed in.
|
||||
*/
|
||||
if (!(gfp_mask & __GFP_HIGHMEM) &&
|
||||
PageHighMem(old_page)) {
|
||||
kimage_free_pages(old_page);
|
||||
continue;
|
||||
}
|
||||
addr = old_addr;
|
||||
page = old_page;
|
||||
break;
|
||||
|
@@ -488,7 +488,7 @@ static int write_mem_msg(int binary)
|
||||
if (err)
|
||||
return err;
|
||||
if (CACHE_FLUSH_IS_SAFE)
|
||||
flush_icache_range(addr, addr + length + 1);
|
||||
flush_icache_range(addr, addr + length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -590,6 +590,7 @@ static void kgdb_wait(struct pt_regs *regs)
|
||||
|
||||
/* Signal the primary CPU that we are done: */
|
||||
atomic_set(&cpu_in_kgdb[cpu], 0);
|
||||
touch_softlockup_watchdog();
|
||||
clocksource_touch_watchdog();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -1432,6 +1433,7 @@ acquirelock:
|
||||
atomic_read(&kgdb_cpu_doing_single_step) != cpu) {
|
||||
|
||||
atomic_set(&kgdb_active, -1);
|
||||
touch_softlockup_watchdog();
|
||||
clocksource_touch_watchdog();
|
||||
local_irq_restore(flags);
|
||||
|
||||
@@ -1462,7 +1464,7 @@ acquirelock:
|
||||
* Get the passive CPU lock which will hold all the non-primary
|
||||
* CPU in a spin state while the debugger is active
|
||||
*/
|
||||
if (!kgdb_single_step || !kgdb_contthread) {
|
||||
if (!kgdb_single_step) {
|
||||
for (i = 0; i < NR_CPUS; i++)
|
||||
atomic_set(&passive_cpu_wait[i], 1);
|
||||
}
|
||||
@@ -1475,7 +1477,7 @@ acquirelock:
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Signal the other CPUs to enter kgdb_wait() */
|
||||
if ((!kgdb_single_step || !kgdb_contthread) && kgdb_do_roundup)
|
||||
if ((!kgdb_single_step) && kgdb_do_roundup)
|
||||
kgdb_roundup_cpus(flags);
|
||||
#endif
|
||||
|
||||
@@ -1494,7 +1496,7 @@ acquirelock:
|
||||
kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
|
||||
kgdb_deactivate_sw_breakpoints();
|
||||
kgdb_single_step = 0;
|
||||
kgdb_contthread = NULL;
|
||||
kgdb_contthread = current;
|
||||
exception_level = 0;
|
||||
|
||||
/* Talk to debugger with gdbserial protocol */
|
||||
@@ -1508,7 +1510,7 @@ acquirelock:
|
||||
kgdb_info[ks->cpu].task = NULL;
|
||||
atomic_set(&cpu_in_kgdb[ks->cpu], 0);
|
||||
|
||||
if (!kgdb_single_step || !kgdb_contthread) {
|
||||
if (!kgdb_single_step) {
|
||||
for (i = NR_CPUS-1; i >= 0; i--)
|
||||
atomic_set(&passive_cpu_wait[i], 0);
|
||||
/*
|
||||
@@ -1524,6 +1526,7 @@ acquirelock:
|
||||
kgdb_restore:
|
||||
/* Free kgdb_active */
|
||||
atomic_set(&kgdb_active, -1);
|
||||
touch_softlockup_watchdog();
|
||||
clocksource_touch_watchdog();
|
||||
local_irq_restore(flags);
|
||||
|
||||
|
@@ -113,7 +113,7 @@ int request_module(const char *fmt, ...)
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(request_module);
|
||||
#endif /* CONFIG_KMOD */
|
||||
#endif /* CONFIG_MODULES */
|
||||
|
||||
struct subprocess_info {
|
||||
struct work_struct work;
|
||||
@@ -265,7 +265,7 @@ static void __call_usermodehelper(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
/*
|
||||
* If set, call_usermodehelper_exec() will exit immediately returning -EBUSY
|
||||
* (used for preventing user land processes from being created after the user
|
||||
@@ -288,39 +288,37 @@ static DECLARE_WAIT_QUEUE_HEAD(running_helpers_waitq);
|
||||
*/
|
||||
#define RUNNING_HELPERS_TIMEOUT (5 * HZ)
|
||||
|
||||
static int usermodehelper_pm_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *ignored)
|
||||
/**
|
||||
* usermodehelper_disable - prevent new helpers from being started
|
||||
*/
|
||||
int usermodehelper_disable(void)
|
||||
{
|
||||
long retval;
|
||||
|
||||
switch (action) {
|
||||
case PM_HIBERNATION_PREPARE:
|
||||
case PM_SUSPEND_PREPARE:
|
||||
usermodehelper_disabled = 1;
|
||||
smp_mb();
|
||||
/*
|
||||
* From now on call_usermodehelper_exec() won't start any new
|
||||
* helpers, so it is sufficient if running_helpers turns out to
|
||||
* be zero at one point (it may be increased later, but that
|
||||
* doesn't matter).
|
||||
*/
|
||||
retval = wait_event_timeout(running_helpers_waitq,
|
||||
usermodehelper_disabled = 1;
|
||||
smp_mb();
|
||||
/*
|
||||
* From now on call_usermodehelper_exec() won't start any new
|
||||
* helpers, so it is sufficient if running_helpers turns out to
|
||||
* be zero at one point (it may be increased later, but that
|
||||
* doesn't matter).
|
||||
*/
|
||||
retval = wait_event_timeout(running_helpers_waitq,
|
||||
atomic_read(&running_helpers) == 0,
|
||||
RUNNING_HELPERS_TIMEOUT);
|
||||
if (retval) {
|
||||
return NOTIFY_OK;
|
||||
} else {
|
||||
usermodehelper_disabled = 0;
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
case PM_POST_HIBERNATION:
|
||||
case PM_POST_SUSPEND:
|
||||
usermodehelper_disabled = 0;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
if (retval)
|
||||
return 0;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
usermodehelper_disabled = 0;
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* usermodehelper_enable - allow new helpers to be started again
|
||||
*/
|
||||
void usermodehelper_enable(void)
|
||||
{
|
||||
usermodehelper_disabled = 0;
|
||||
}
|
||||
|
||||
static void helper_lock(void)
|
||||
@@ -334,18 +332,12 @@ static void helper_unlock(void)
|
||||
if (atomic_dec_and_test(&running_helpers))
|
||||
wake_up(&running_helpers_waitq);
|
||||
}
|
||||
|
||||
static void register_pm_notifier_callback(void)
|
||||
{
|
||||
pm_notifier(usermodehelper_pm_callback, 0);
|
||||
}
|
||||
#else /* CONFIG_PM */
|
||||
#else /* CONFIG_PM_SLEEP */
|
||||
#define usermodehelper_disabled 0
|
||||
|
||||
static inline void helper_lock(void) {}
|
||||
static inline void helper_unlock(void) {}
|
||||
static inline void register_pm_notifier_callback(void) {}
|
||||
#endif /* CONFIG_PM */
|
||||
#endif /* CONFIG_PM_SLEEP */
|
||||
|
||||
/**
|
||||
* call_usermodehelper_setup - prepare to call a usermode helper
|
||||
@@ -515,5 +507,4 @@ void __init usermodehelper_init(void)
|
||||
{
|
||||
khelper_wq = create_singlethread_workqueue("khelper");
|
||||
BUG_ON(!khelper_wq);
|
||||
register_pm_notifier_callback();
|
||||
}
|
||||
|
@@ -404,7 +404,7 @@ void kretprobe_hash_lock(struct task_struct *tsk,
|
||||
spin_lock_irqsave(hlist_lock, *flags);
|
||||
}
|
||||
|
||||
void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
|
||||
static void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
|
||||
{
|
||||
spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
spin_lock_irqsave(hlist_lock, *flags);
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#define KERNEL_ATTR_RO(_name) \
|
||||
@@ -53,6 +54,37 @@ static ssize_t uevent_helper_store(struct kobject *kobj,
|
||||
KERNEL_ATTR_RW(uevent_helper);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PROFILING
|
||||
static ssize_t profiling_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%d\n", prof_on);
|
||||
}
|
||||
static ssize_t profiling_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (prof_on)
|
||||
return -EEXIST;
|
||||
/*
|
||||
* This eventually calls into get_option() which
|
||||
* has a ton of callers and is not const. It is
|
||||
* easiest to cast it away here.
|
||||
*/
|
||||
profile_setup((char *)buf);
|
||||
ret = profile_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = create_proc_profile();
|
||||
if (ret)
|
||||
return ret;
|
||||
return count;
|
||||
}
|
||||
KERNEL_ATTR_RW(profiling);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
static ssize_t kexec_loaded_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
@@ -109,6 +141,9 @@ static struct attribute * kernel_attrs[] = {
|
||||
&uevent_seqnum_attr.attr,
|
||||
&uevent_helper_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_PROFILING
|
||||
&profiling_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_KEXEC
|
||||
&kexec_loaded_attr.attr,
|
||||
&kexec_crash_loaded_attr.attr,
|
||||
|
@@ -100,7 +100,7 @@ static inline int strong_try_module_get(struct module *mod)
|
||||
static inline void add_taint_module(struct module *mod, unsigned flag)
|
||||
{
|
||||
add_taint(flag);
|
||||
mod->taints |= flag;
|
||||
mod->taints |= (1U << flag);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -784,6 +784,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
|
||||
mutex_lock(&module_mutex);
|
||||
/* Store the name of the last unloaded module for diagnostic purposes */
|
||||
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
|
||||
unregister_dynamic_debug_module(mod->name);
|
||||
free_module(mod);
|
||||
|
||||
out:
|
||||
@@ -923,7 +924,7 @@ static const char vermagic[] = VERMAGIC_STRING;
|
||||
static int try_to_force_load(struct module *mod, const char *symname)
|
||||
{
|
||||
#ifdef CONFIG_MODULE_FORCE_LOAD
|
||||
if (!(tainted & TAINT_FORCED_MODULE))
|
||||
if (!test_taint(TAINT_FORCED_MODULE))
|
||||
printk("%s: no version for \"%s\" found: kernel tainted.\n",
|
||||
mod->name, symname);
|
||||
add_taint_module(mod, TAINT_FORCED_MODULE);
|
||||
@@ -1033,7 +1034,7 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
|
||||
const unsigned long *crc;
|
||||
|
||||
ret = find_symbol(name, &owner, &crc,
|
||||
!(mod->taints & TAINT_PROPRIETARY_MODULE), true);
|
||||
!(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
|
||||
if (!IS_ERR_VALUE(ret)) {
|
||||
/* use_module can fail due to OOM,
|
||||
or module initialization or unloading */
|
||||
@@ -1173,7 +1174,7 @@ static void free_notes_attrs(struct module_notes_attrs *notes_attrs,
|
||||
while (i-- > 0)
|
||||
sysfs_remove_bin_file(notes_attrs->dir,
|
||||
¬es_attrs->attrs[i]);
|
||||
kobject_del(notes_attrs->dir);
|
||||
kobject_put(notes_attrs->dir);
|
||||
}
|
||||
kfree(notes_attrs);
|
||||
}
|
||||
@@ -1634,7 +1635,7 @@ static void set_license(struct module *mod, const char *license)
|
||||
license = "unspecified";
|
||||
|
||||
if (!license_is_gpl_compatible(license)) {
|
||||
if (!(tainted & TAINT_PROPRIETARY_MODULE))
|
||||
if (!test_taint(TAINT_PROPRIETARY_MODULE))
|
||||
printk(KERN_WARNING "%s: module license '%s' taints "
|
||||
"kernel.\n", mod->name, license);
|
||||
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
|
||||
@@ -1783,6 +1784,33 @@ static inline void add_kallsyms(struct module *mod,
|
||||
}
|
||||
#endif /* CONFIG_KALLSYMS */
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
|
||||
static void dynamic_printk_setup(Elf_Shdr *sechdrs, unsigned int verboseindex)
|
||||
{
|
||||
struct mod_debug *debug_info;
|
||||
unsigned long pos, end;
|
||||
unsigned int num_verbose;
|
||||
|
||||
pos = sechdrs[verboseindex].sh_addr;
|
||||
num_verbose = sechdrs[verboseindex].sh_size /
|
||||
sizeof(struct mod_debug);
|
||||
end = pos + (num_verbose * sizeof(struct mod_debug));
|
||||
|
||||
for (; pos < end; pos += sizeof(struct mod_debug)) {
|
||||
debug_info = (struct mod_debug *)pos;
|
||||
register_dynamic_debug_module(debug_info->modname,
|
||||
debug_info->type, debug_info->logical_modname,
|
||||
debug_info->flag_names, debug_info->hash,
|
||||
debug_info->hash2);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void dynamic_printk_setup(Elf_Shdr *sechdrs,
|
||||
unsigned int verboseindex)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
|
||||
|
||||
static void *module_alloc_update_bounds(unsigned long size)
|
||||
{
|
||||
void *ret = module_alloc(size);
|
||||
@@ -1831,6 +1859,7 @@ static noinline struct module *load_module(void __user *umod,
|
||||
#endif
|
||||
unsigned int markersindex;
|
||||
unsigned int markersstringsindex;
|
||||
unsigned int verboseindex;
|
||||
struct module *mod;
|
||||
long err = 0;
|
||||
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
|
||||
@@ -2117,6 +2146,7 @@ static noinline struct module *load_module(void __user *umod,
|
||||
markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
|
||||
markersstringsindex = find_sec(hdr, sechdrs, secstrings,
|
||||
"__markers_strings");
|
||||
verboseindex = find_sec(hdr, sechdrs, secstrings, "__verbose");
|
||||
|
||||
/* Now do relocations. */
|
||||
for (i = 1; i < hdr->e_shnum; i++) {
|
||||
@@ -2167,6 +2197,7 @@ static noinline struct module *load_module(void __user *umod,
|
||||
marker_update_probe_range(mod->markers,
|
||||
mod->markers + mod->num_markers);
|
||||
#endif
|
||||
dynamic_printk_setup(sechdrs, verboseindex);
|
||||
err = module_finalize(hdr, sechdrs, mod);
|
||||
if (err < 0)
|
||||
goto cleanup;
|
||||
@@ -2552,9 +2583,9 @@ static char *module_flags(struct module *mod, char *buf)
|
||||
mod->state == MODULE_STATE_GOING ||
|
||||
mod->state == MODULE_STATE_COMING) {
|
||||
buf[bx++] = '(';
|
||||
if (mod->taints & TAINT_PROPRIETARY_MODULE)
|
||||
if (mod->taints & (1 << TAINT_PROPRIETARY_MODULE))
|
||||
buf[bx++] = 'P';
|
||||
if (mod->taints & TAINT_FORCED_MODULE)
|
||||
if (mod->taints & (1 << TAINT_FORCED_MODULE))
|
||||
buf[bx++] = 'F';
|
||||
/*
|
||||
* TAINT_FORCED_RMMOD: could be added.
|
||||
|
@@ -23,7 +23,7 @@
|
||||
#include <linux/kallsyms.h>
|
||||
|
||||
int panic_on_oops;
|
||||
int tainted;
|
||||
static unsigned long tainted_mask;
|
||||
static int pause_on_oops;
|
||||
static int pause_on_oops_flag;
|
||||
static DEFINE_SPINLOCK(pause_on_oops_lock);
|
||||
@@ -143,6 +143,26 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
|
||||
EXPORT_SYMBOL(panic);
|
||||
|
||||
|
||||
struct tnt {
|
||||
u8 bit;
|
||||
char true;
|
||||
char false;
|
||||
};
|
||||
|
||||
static const struct tnt tnts[] = {
|
||||
{ TAINT_PROPRIETARY_MODULE, 'P', 'G' },
|
||||
{ TAINT_FORCED_MODULE, 'F', ' ' },
|
||||
{ TAINT_UNSAFE_SMP, 'S', ' ' },
|
||||
{ TAINT_FORCED_RMMOD, 'R', ' ' },
|
||||
{ TAINT_MACHINE_CHECK, 'M', ' ' },
|
||||
{ TAINT_BAD_PAGE, 'B', ' ' },
|
||||
{ TAINT_USER, 'U', ' ' },
|
||||
{ TAINT_DIE, 'D', ' ' },
|
||||
{ TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
|
||||
{ TAINT_WARN, 'W', ' ' },
|
||||
};
|
||||
|
||||
/**
|
||||
* print_tainted - return a string to represent the kernel taint state.
|
||||
*
|
||||
@@ -158,32 +178,41 @@ EXPORT_SYMBOL(panic);
|
||||
*
|
||||
* The string is overwritten by the next call to print_taint().
|
||||
*/
|
||||
|
||||
const char *print_tainted(void)
|
||||
{
|
||||
static char buf[20];
|
||||
if (tainted) {
|
||||
snprintf(buf, sizeof(buf), "Tainted: %c%c%c%c%c%c%c%c%c%c",
|
||||
tainted & TAINT_PROPRIETARY_MODULE ? 'P' : 'G',
|
||||
tainted & TAINT_FORCED_MODULE ? 'F' : ' ',
|
||||
tainted & TAINT_UNSAFE_SMP ? 'S' : ' ',
|
||||
tainted & TAINT_FORCED_RMMOD ? 'R' : ' ',
|
||||
tainted & TAINT_MACHINE_CHECK ? 'M' : ' ',
|
||||
tainted & TAINT_BAD_PAGE ? 'B' : ' ',
|
||||
tainted & TAINT_USER ? 'U' : ' ',
|
||||
tainted & TAINT_DIE ? 'D' : ' ',
|
||||
tainted & TAINT_OVERRIDDEN_ACPI_TABLE ? 'A' : ' ',
|
||||
tainted & TAINT_WARN ? 'W' : ' ');
|
||||
}
|
||||
else
|
||||
static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ") + 1];
|
||||
|
||||
if (tainted_mask) {
|
||||
char *s;
|
||||
int i;
|
||||
|
||||
s = buf + sprintf(buf, "Tainted: ");
|
||||
for (i = 0; i < ARRAY_SIZE(tnts); i++) {
|
||||
const struct tnt *t = &tnts[i];
|
||||
*s++ = test_bit(t->bit, &tainted_mask) ?
|
||||
t->true : t->false;
|
||||
}
|
||||
*s = 0;
|
||||
} else
|
||||
snprintf(buf, sizeof(buf), "Not tainted");
|
||||
return(buf);
|
||||
}
|
||||
|
||||
int test_taint(unsigned flag)
|
||||
{
|
||||
return test_bit(flag, &tainted_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(test_taint);
|
||||
|
||||
unsigned long get_taint(void)
|
||||
{
|
||||
return tainted_mask;
|
||||
}
|
||||
|
||||
void add_taint(unsigned flag)
|
||||
{
|
||||
debug_locks = 0; /* can't trust the integrity of the kernel anymore */
|
||||
tainted |= flag;
|
||||
set_bit(flag, &tainted_mask);
|
||||
}
|
||||
EXPORT_SYMBOL(add_taint);
|
||||
|
||||
|
@@ -441,7 +441,7 @@ static struct k_itimer * alloc_posix_timer(void)
|
||||
return tmr;
|
||||
if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
|
||||
kmem_cache_free(posix_timers_cache, tmr);
|
||||
tmr = NULL;
|
||||
return NULL;
|
||||
}
|
||||
memset(&tmr->sigq->info, 0, sizeof(siginfo_t));
|
||||
return tmr;
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/mount.h>
|
||||
@@ -520,6 +521,10 @@ int hibernate(void)
|
||||
if (error)
|
||||
goto Exit;
|
||||
|
||||
error = usermodehelper_disable();
|
||||
if (error)
|
||||
goto Exit;
|
||||
|
||||
/* Allocate memory management structures */
|
||||
error = create_basic_memory_bitmaps();
|
||||
if (error)
|
||||
@@ -558,6 +563,7 @@ int hibernate(void)
|
||||
thaw_processes();
|
||||
Finish:
|
||||
free_basic_memory_bitmaps();
|
||||
usermodehelper_enable();
|
||||
Exit:
|
||||
pm_notifier_call_chain(PM_POST_HIBERNATION);
|
||||
pm_restore_console();
|
||||
@@ -634,6 +640,10 @@ static int software_resume(void)
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
error = usermodehelper_disable();
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
error = create_basic_memory_bitmaps();
|
||||
if (error)
|
||||
goto Finish;
|
||||
@@ -656,6 +666,7 @@ static int software_resume(void)
|
||||
thaw_processes();
|
||||
Done:
|
||||
free_basic_memory_bitmaps();
|
||||
usermodehelper_enable();
|
||||
Finish:
|
||||
pm_notifier_call_chain(PM_POST_RESTORE);
|
||||
pm_restore_console();
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/console.h>
|
||||
#include <linux/cpu.h>
|
||||
@@ -237,6 +238,10 @@ static int suspend_prepare(void)
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
error = usermodehelper_disable();
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
if (suspend_freeze_processes()) {
|
||||
error = -EAGAIN;
|
||||
goto Thaw;
|
||||
@@ -256,6 +261,7 @@ static int suspend_prepare(void)
|
||||
|
||||
Thaw:
|
||||
suspend_thaw_processes();
|
||||
usermodehelper_enable();
|
||||
Finish:
|
||||
pm_notifier_call_chain(PM_POST_SUSPEND);
|
||||
pm_restore_console();
|
||||
@@ -376,6 +382,7 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
static void suspend_finish(void)
|
||||
{
|
||||
suspend_thaw_processes();
|
||||
usermodehelper_enable();
|
||||
pm_notifier_call_chain(PM_POST_SUSPEND);
|
||||
pm_restore_console();
|
||||
}
|
||||
|
@@ -212,13 +212,20 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
|
||||
case SNAPSHOT_FREEZE:
|
||||
if (data->frozen)
|
||||
break;
|
||||
|
||||
printk("Syncing filesystems ... ");
|
||||
sys_sync();
|
||||
printk("done.\n");
|
||||
|
||||
error = freeze_processes();
|
||||
error = usermodehelper_disable();
|
||||
if (error)
|
||||
break;
|
||||
|
||||
error = freeze_processes();
|
||||
if (error) {
|
||||
thaw_processes();
|
||||
usermodehelper_enable();
|
||||
}
|
||||
if (!error)
|
||||
data->frozen = 1;
|
||||
break;
|
||||
@@ -227,6 +234,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
|
||||
if (!data->frozen || data->ready)
|
||||
break;
|
||||
thaw_processes();
|
||||
usermodehelper_enable();
|
||||
data->frozen = 0;
|
||||
break;
|
||||
|
||||
|
@@ -13,7 +13,7 @@
|
||||
* Fixed SMP synchronization, 08/08/99, Manfred Spraul
|
||||
* manfred@colorfullife.com
|
||||
* Rewrote bits to get rid of console_lock
|
||||
* 01Mar01 Andrew Morton <andrewm@uow.edu.au>
|
||||
* 01Mar01 Andrew Morton
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
@@ -577,9 +577,6 @@ static int have_callable_console(void)
|
||||
* @fmt: format string
|
||||
*
|
||||
* This is printk(). It can be called from any context. We want it to work.
|
||||
* Be aware of the fact that if oops_in_progress is not set, we might try to
|
||||
* wake klogd up which could deadlock on runqueue lock if printk() is called
|
||||
* from scheduler code.
|
||||
*
|
||||
* We try to grab the console_sem. If we succeed, it's easy - we log the output and
|
||||
* call the console drivers. If we fail to get the semaphore we place the output
|
||||
@@ -593,6 +590,8 @@ static int have_callable_console(void)
|
||||
*
|
||||
* See also:
|
||||
* printf(3)
|
||||
*
|
||||
* See the vsnprintf() documentation for format string extensions over C99.
|
||||
*/
|
||||
|
||||
asmlinkage int printk(const char *fmt, ...)
|
||||
@@ -982,10 +981,25 @@ int is_console_locked(void)
|
||||
return console_locked;
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(int, printk_pending);
|
||||
|
||||
void printk_tick(void)
|
||||
{
|
||||
if (__get_cpu_var(printk_pending)) {
|
||||
__get_cpu_var(printk_pending) = 0;
|
||||
wake_up_interruptible(&log_wait);
|
||||
}
|
||||
}
|
||||
|
||||
int printk_needs_cpu(int cpu)
|
||||
{
|
||||
return per_cpu(printk_pending, cpu);
|
||||
}
|
||||
|
||||
void wake_up_klogd(void)
|
||||
{
|
||||
if (!oops_in_progress && waitqueue_active(&log_wait))
|
||||
wake_up_interruptible(&log_wait);
|
||||
if (waitqueue_active(&log_wait))
|
||||
__raw_get_cpu_var(printk_pending) = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1291,22 +1305,6 @@ static int __init disable_boot_consoles(void)
|
||||
}
|
||||
late_initcall(disable_boot_consoles);
|
||||
|
||||
/**
|
||||
* tty_write_message - write a message to a certain tty, not just the console.
|
||||
* @tty: the destination tty_struct
|
||||
* @msg: the message to write
|
||||
*
|
||||
* This is used for messages that need to be redirected to a specific tty.
|
||||
* We don't put it into the syslog queue right now maybe in the future if
|
||||
* really needed.
|
||||
*/
|
||||
void tty_write_message(struct tty_struct *tty, char *msg)
|
||||
{
|
||||
if (tty && tty->ops->write)
|
||||
tty->ops->write(tty, msg, strlen(msg));
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined CONFIG_PRINTK
|
||||
|
||||
/*
|
||||
|
@@ -22,6 +22,8 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/irq_regs.h>
|
||||
#include <asm/ptrace.h>
|
||||
@@ -50,11 +52,11 @@ static DEFINE_PER_CPU(int, cpu_profile_flip);
|
||||
static DEFINE_MUTEX(profile_flip_mutex);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static int __init profile_setup(char *str)
|
||||
int profile_setup(char *str)
|
||||
{
|
||||
static char __initdata schedstr[] = "schedule";
|
||||
static char __initdata sleepstr[] = "sleep";
|
||||
static char __initdata kvmstr[] = "kvm";
|
||||
static char schedstr[] = "schedule";
|
||||
static char sleepstr[] = "sleep";
|
||||
static char kvmstr[] = "kvm";
|
||||
int par;
|
||||
|
||||
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
|
||||
@@ -100,14 +102,33 @@ static int __init profile_setup(char *str)
|
||||
__setup("profile=", profile_setup);
|
||||
|
||||
|
||||
void __init profile_init(void)
|
||||
int profile_init(void)
|
||||
{
|
||||
int buffer_bytes;
|
||||
if (!prof_on)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/* only text is profiled */
|
||||
prof_len = (_etext - _stext) >> prof_shift;
|
||||
prof_buffer = alloc_bootmem(prof_len*sizeof(atomic_t));
|
||||
buffer_bytes = prof_len*sizeof(atomic_t);
|
||||
if (!slab_is_available()) {
|
||||
prof_buffer = alloc_bootmem(buffer_bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
|
||||
if (prof_buffer)
|
||||
return 0;
|
||||
|
||||
prof_buffer = alloc_pages_exact(buffer_bytes, GFP_KERNEL|__GFP_ZERO);
|
||||
if (prof_buffer)
|
||||
return 0;
|
||||
|
||||
prof_buffer = vmalloc(buffer_bytes);
|
||||
if (prof_buffer)
|
||||
return 0;
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Profile event notifications */
|
||||
@@ -527,7 +548,7 @@ static void __init profile_nop(void *unused)
|
||||
{
|
||||
}
|
||||
|
||||
static int __init create_hash_tables(void)
|
||||
static int create_hash_tables(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
@@ -575,14 +596,14 @@ out_cleanup:
|
||||
#define create_hash_tables() ({ 0; })
|
||||
#endif
|
||||
|
||||
static int __init create_proc_profile(void)
|
||||
int create_proc_profile(void)
|
||||
{
|
||||
struct proc_dir_entry *entry;
|
||||
|
||||
if (!prof_on)
|
||||
return 0;
|
||||
if (create_hash_tables())
|
||||
return -1;
|
||||
return -ENOMEM;
|
||||
entry = proc_create("profile", S_IWUSR | S_IRUGO,
|
||||
NULL, &proc_profile_operations);
|
||||
if (!entry)
|
||||
|
@@ -47,6 +47,7 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/time.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
static struct lock_class_key rcu_lock_key;
|
||||
@@ -60,12 +61,14 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.pending = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.cur = -300,
|
||||
.completed = -300,
|
||||
.pending = -300,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
|
||||
.cpumask = CPU_MASK_NONE,
|
||||
};
|
||||
@@ -83,7 +86,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
|
||||
{
|
||||
int cpu;
|
||||
cpumask_t cpumask;
|
||||
unsigned long flags;
|
||||
|
||||
set_need_resched();
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
if (unlikely(!rcp->signaled)) {
|
||||
rcp->signaled = 1;
|
||||
/*
|
||||
@@ -109,6 +115,7 @@ static void force_quiescent_state(struct rcu_data *rdp,
|
||||
for_each_cpu_mask_nr(cpu, cpumask)
|
||||
smp_send_reschedule(cpu);
|
||||
}
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
}
|
||||
#else
|
||||
static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
@@ -118,6 +125,126 @@ static inline void force_quiescent_state(struct rcu_data *rdp,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
long batch;
|
||||
|
||||
head->next = NULL;
|
||||
smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
|
||||
|
||||
/*
|
||||
* Determine the batch number of this callback.
|
||||
*
|
||||
* Using ACCESS_ONCE to avoid the following error when gcc eliminates
|
||||
* local variable "batch" and emits codes like this:
|
||||
* 1) rdp->batch = rcp->cur + 1 # gets old value
|
||||
* ......
|
||||
* 2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
|
||||
* then [*nxttail[0], *nxttail[1]) may contain callbacks
|
||||
* that batch# = rdp->batch, see the comment of struct rcu_data.
|
||||
*/
|
||||
batch = ACCESS_ONCE(rcp->cur) + 1;
|
||||
|
||||
if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
|
||||
/* process callbacks */
|
||||
rdp->nxttail[0] = rdp->nxttail[1];
|
||||
rdp->nxttail[1] = rdp->nxttail[2];
|
||||
if (rcu_batch_after(batch - 1, rdp->batch))
|
||||
rdp->nxttail[0] = rdp->nxttail[2];
|
||||
}
|
||||
|
||||
rdp->batch = batch;
|
||||
*rdp->nxttail[2] = head;
|
||||
rdp->nxttail[2] = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
|
||||
|
||||
static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
rcp->gp_start = jiffies;
|
||||
rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
|
||||
}
|
||||
|
||||
static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
int cpu;
|
||||
long delta;
|
||||
unsigned long flags;
|
||||
|
||||
/* Only let one CPU complain about others per time interval. */
|
||||
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
delta = jiffies - rcp->jiffies_stall;
|
||||
if (delta < 2 || rcp->cur != rcp->completed) {
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
return;
|
||||
}
|
||||
rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
|
||||
/* OK, time to rat on our buddy... */
|
||||
|
||||
printk(KERN_ERR "RCU detected CPU stalls:");
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu_isset(cpu, rcp->cpumask))
|
||||
printk(" %d", cpu);
|
||||
}
|
||||
printk(" (detected by %d, t=%ld jiffies)\n",
|
||||
smp_processor_id(), (long)(jiffies - rcp->gp_start));
|
||||
}
|
||||
|
||||
static void print_cpu_stall(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
|
||||
smp_processor_id(), jiffies,
|
||||
jiffies - rcp->gp_start);
|
||||
dump_stack();
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
if ((long)(jiffies - rcp->jiffies_stall) >= 0)
|
||||
rcp->jiffies_stall =
|
||||
jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
set_need_resched(); /* kick ourselves to get things going. */
|
||||
}
|
||||
|
||||
static void check_cpu_stall(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
long delta;
|
||||
|
||||
delta = jiffies - rcp->jiffies_stall;
|
||||
if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
|
||||
|
||||
/* We haven't checked in, so go dump stack. */
|
||||
print_cpu_stall(rcp);
|
||||
|
||||
} else if (rcp->cur != rcp->completed && delta >= 2) {
|
||||
|
||||
/* They had two seconds to dump stack, so complain. */
|
||||
print_other_cpu_stall(rcp);
|
||||
}
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
|
||||
static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
|
||||
/**
|
||||
* call_rcu - Queue an RCU callback for invocation after a grace period.
|
||||
* @head: structure to be used for queueing the RCU updates.
|
||||
@@ -133,18 +260,10 @@ void call_rcu(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_ctrlblk);
|
||||
}
|
||||
__call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
@@ -169,20 +288,10 @@ void call_rcu_bh(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_data *rdp;
|
||||
|
||||
head->func = func;
|
||||
head->next = NULL;
|
||||
local_irq_save(flags);
|
||||
rdp = &__get_cpu_var(rcu_bh_data);
|
||||
*rdp->nxttail = head;
|
||||
rdp->nxttail = &head->next;
|
||||
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
rdp->blimit = INT_MAX;
|
||||
force_quiescent_state(rdp, &rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
__call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu_bh);
|
||||
@@ -211,12 +320,6 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
|
||||
static inline void raise_rcu_softirq(void)
|
||||
{
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
/*
|
||||
* The smp_mb() here is required to ensure that this cpu's
|
||||
* __rcu_process_callbacks() reads the most recently updated
|
||||
* value of rcu->cur.
|
||||
*/
|
||||
smp_mb();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -225,6 +328,7 @@ static inline void raise_rcu_softirq(void)
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *next, *list;
|
||||
int count = 0;
|
||||
|
||||
@@ -239,9 +343,9 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
}
|
||||
rdp->donelist = list;
|
||||
|
||||
local_irq_disable();
|
||||
local_irq_save(flags);
|
||||
rdp->qlen -= count;
|
||||
local_irq_enable();
|
||||
local_irq_restore(flags);
|
||||
if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
@@ -269,6 +373,7 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
|
||||
* period (if necessary).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Register a new batch of callbacks, and start it up if there is currently no
|
||||
* active batch and the batch to be registered has not already occurred.
|
||||
@@ -276,15 +381,10 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
*/
|
||||
static void rcu_start_batch(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
if (rcp->next_pending &&
|
||||
if (rcp->cur != rcp->pending &&
|
||||
rcp->completed == rcp->cur) {
|
||||
rcp->next_pending = 0;
|
||||
/*
|
||||
* next_pending == 0 must be visible in
|
||||
* __rcu_process_callbacks() before it can see new value of cur.
|
||||
*/
|
||||
smp_wmb();
|
||||
rcp->cur++;
|
||||
record_gp_stall_check_time(rcp);
|
||||
|
||||
/*
|
||||
* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
|
||||
@@ -322,6 +422,8 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
|
||||
static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (rdp->quiescbatch != rcp->cur) {
|
||||
/* start new grace period: */
|
||||
rdp->qs_pending = 1;
|
||||
@@ -345,7 +447,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
return;
|
||||
rdp->qs_pending = 0;
|
||||
|
||||
spin_lock(&rcp->lock);
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
/*
|
||||
* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
|
||||
* during cpu startup. Ignore the quiescent state.
|
||||
@@ -353,7 +455,7 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
if (likely(rdp->quiescbatch == rcp->cur))
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
|
||||
spin_unlock(&rcp->lock);
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
}
|
||||
|
||||
|
||||
@@ -364,33 +466,38 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
|
||||
* which is dead and hence not processing interrupts.
|
||||
*/
|
||||
static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
|
||||
struct rcu_head **tail)
|
||||
struct rcu_head **tail, long batch)
|
||||
{
|
||||
local_irq_disable();
|
||||
*this_rdp->nxttail = list;
|
||||
if (list)
|
||||
this_rdp->nxttail = tail;
|
||||
local_irq_enable();
|
||||
unsigned long flags;
|
||||
|
||||
if (list) {
|
||||
local_irq_save(flags);
|
||||
this_rdp->batch = batch;
|
||||
*this_rdp->nxttail[2] = list;
|
||||
this_rdp->nxttail[2] = tail;
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void __rcu_offline_cpu(struct rcu_data *this_rdp,
|
||||
struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* if the cpu going offline owns the grace period
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* if the cpu going offline owns the grace period
|
||||
* we can block indefinitely waiting for it, so flush
|
||||
* it here
|
||||
*/
|
||||
spin_lock_bh(&rcp->lock);
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
if (rcp->cur != rcp->completed)
|
||||
cpu_quiet(rdp->cpu, rcp);
|
||||
spin_unlock_bh(&rcp->lock);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
|
||||
rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
|
||||
rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
|
||||
rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
|
||||
spin_unlock(&rcp->lock);
|
||||
|
||||
local_irq_disable();
|
||||
this_rdp->qlen += rdp->qlen;
|
||||
local_irq_enable();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void rcu_offline_cpu(int cpu)
|
||||
@@ -420,38 +527,52 @@ static void rcu_offline_cpu(int cpu)
|
||||
static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
|
||||
*rdp->donetail = rdp->curlist;
|
||||
rdp->donetail = rdp->curtail;
|
||||
rdp->curlist = NULL;
|
||||
rdp->curtail = &rdp->curlist;
|
||||
}
|
||||
unsigned long flags;
|
||||
long completed_snap;
|
||||
|
||||
if (rdp->nxtlist && !rdp->curlist) {
|
||||
local_irq_disable();
|
||||
rdp->curlist = rdp->nxtlist;
|
||||
rdp->curtail = rdp->nxttail;
|
||||
rdp->nxtlist = NULL;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
local_irq_enable();
|
||||
if (rdp->nxtlist) {
|
||||
local_irq_save(flags);
|
||||
completed_snap = ACCESS_ONCE(rcp->completed);
|
||||
|
||||
/*
|
||||
* start the next batch of callbacks
|
||||
* move the other grace-period-completed entries to
|
||||
* [rdp->nxtlist, *rdp->nxttail[0]) temporarily
|
||||
*/
|
||||
if (!rcu_batch_before(completed_snap, rdp->batch))
|
||||
rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
|
||||
else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
|
||||
rdp->nxttail[0] = rdp->nxttail[1];
|
||||
|
||||
/* determine batch number */
|
||||
rdp->batch = rcp->cur + 1;
|
||||
/* see the comment and corresponding wmb() in
|
||||
* the rcu_start_batch()
|
||||
/*
|
||||
* the grace period for entries in
|
||||
* [rdp->nxtlist, *rdp->nxttail[0]) has completed and
|
||||
* move these entries to donelist
|
||||
*/
|
||||
smp_rmb();
|
||||
if (rdp->nxttail[0] != &rdp->nxtlist) {
|
||||
*rdp->donetail = rdp->nxtlist;
|
||||
rdp->donetail = rdp->nxttail[0];
|
||||
rdp->nxtlist = *rdp->nxttail[0];
|
||||
*rdp->donetail = NULL;
|
||||
|
||||
if (rdp->nxttail[1] == rdp->nxttail[0])
|
||||
rdp->nxttail[1] = &rdp->nxtlist;
|
||||
if (rdp->nxttail[2] == rdp->nxttail[0])
|
||||
rdp->nxttail[2] = &rdp->nxtlist;
|
||||
rdp->nxttail[0] = &rdp->nxtlist;
|
||||
}
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (rcu_batch_after(rdp->batch, rcp->pending)) {
|
||||
unsigned long flags2;
|
||||
|
||||
if (!rcp->next_pending) {
|
||||
/* and start it/schedule start if it's a new batch */
|
||||
spin_lock(&rcp->lock);
|
||||
rcp->next_pending = 1;
|
||||
rcu_start_batch(rcp);
|
||||
spin_unlock(&rcp->lock);
|
||||
spin_lock_irqsave(&rcp->lock, flags2);
|
||||
if (rcu_batch_after(rdp->batch, rcp->pending)) {
|
||||
rcp->pending = rdp->batch;
|
||||
rcu_start_batch(rcp);
|
||||
}
|
||||
spin_unlock_irqrestore(&rcp->lock, flags2);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -462,21 +583,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
|
||||
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
/*
|
||||
* Memory references from any prior RCU read-side critical sections
|
||||
* executed by the interrupted code must be see before any RCU
|
||||
* grace-period manupulations below.
|
||||
*/
|
||||
|
||||
smp_mb(); /* See above block comment. */
|
||||
|
||||
__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
|
||||
|
||||
/*
|
||||
* Memory references from any later RCU read-side critical sections
|
||||
* executed by the interrupted code must be see after any RCU
|
||||
* grace-period manupulations above.
|
||||
*/
|
||||
|
||||
smp_mb(); /* See above block comment. */
|
||||
}
|
||||
|
||||
static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
|
||||
{
|
||||
/* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
|
||||
return 1;
|
||||
/* Check for CPU stalls, if enabled. */
|
||||
check_cpu_stall(rcp);
|
||||
|
||||
/* This cpu has no pending entries, but there are new entries */
|
||||
if (!rdp->curlist && rdp->nxtlist)
|
||||
return 1;
|
||||
if (rdp->nxtlist) {
|
||||
long completed_snap = ACCESS_ONCE(rcp->completed);
|
||||
|
||||
/*
|
||||
* This cpu has pending rcu entries and the grace period
|
||||
* for them has completed.
|
||||
*/
|
||||
if (!rcu_batch_before(completed_snap, rdp->batch))
|
||||
return 1;
|
||||
if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
|
||||
rdp->nxttail[0] != rdp->nxttail[1])
|
||||
return 1;
|
||||
if (rdp->nxttail[0] != &rdp->nxtlist)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* This cpu has pending rcu entries and the new batch
|
||||
* for then hasn't been started nor scheduled start
|
||||
*/
|
||||
if (rcu_batch_after(rdp->batch, rcp->pending))
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* This cpu has finished callbacks to invoke */
|
||||
if (rdp->donelist)
|
||||
@@ -512,9 +665,15 @@ int rcu_needs_cpu(int cpu)
|
||||
struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
|
||||
struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
return (!!rdp->curlist || !!rdp_bh->curlist || rcu_pending(cpu));
|
||||
return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Top-level function driving RCU grace-period detection, normally
|
||||
* invoked from the scheduler-clock interrupt. This function simply
|
||||
* increments counters that are read only from softirq by this same
|
||||
* CPU, so there are no memory barriers required.
|
||||
*/
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (user ||
|
||||
@@ -558,14 +717,17 @@ void rcu_check_callbacks(int cpu, int user)
|
||||
static void rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&rcp->lock, flags);
|
||||
memset(rdp, 0, sizeof(*rdp));
|
||||
rdp->curtail = &rdp->curlist;
|
||||
rdp->nxttail = &rdp->nxtlist;
|
||||
rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
|
||||
rdp->donetail = &rdp->donelist;
|
||||
rdp->quiescbatch = rcp->completed;
|
||||
rdp->qs_pending = 0;
|
||||
rdp->cpu = cpu;
|
||||
rdp->blimit = blimit;
|
||||
spin_unlock_irqrestore(&rcp->lock, flags);
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_online_cpu(int cpu)
|
||||
@@ -610,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
|
||||
*/
|
||||
void __init __rcu_init(void)
|
||||
{
|
||||
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
|
||||
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
|
||||
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
|
||||
(void *)(long)smp_processor_id());
|
||||
/* Register notifier for non-boot CPUs */
|
||||
|
@@ -58,14 +58,6 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/rcupreempt_trace.h>
|
||||
|
||||
/*
|
||||
* Macro that prevents the compiler from reordering accesses, but does
|
||||
* absolutely -nothing- to prevent CPUs from reordering. This is used
|
||||
* only to mediate communication between mainline code and hardware
|
||||
* interrupt and NMI handlers.
|
||||
*/
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
|
||||
/*
|
||||
* PREEMPT_RCU data structures.
|
||||
*/
|
||||
|
@@ -308,11 +308,16 @@ out:
|
||||
|
||||
static int __init rcupreempt_trace_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_init(&rcupreempt_trace_mutex);
|
||||
rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
|
||||
if (!rcupreempt_trace_buf)
|
||||
return 1;
|
||||
return rcupreempt_debugfs_init();
|
||||
ret = rcupreempt_debugfs_init();
|
||||
if (ret)
|
||||
kfree(rcupreempt_trace_buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit rcupreempt_trace_cleanup(void)
|
||||
|
@@ -38,10 +38,6 @@ EXPORT_SYMBOL(iomem_resource);
|
||||
|
||||
static DEFINE_RWLOCK(resource_lock);
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
enum { MAX_IORES_LEVEL = 5 };
|
||||
|
||||
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct resource *p = v;
|
||||
@@ -53,6 +49,10 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
return p->sibling;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
|
||||
enum { MAX_IORES_LEVEL = 5 };
|
||||
|
||||
static void *r_start(struct seq_file *m, loff_t *pos)
|
||||
__acquires(resource_lock)
|
||||
{
|
||||
@@ -516,6 +516,70 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
|
||||
return result;
|
||||
}
|
||||
|
||||
static void __init __reserve_region_with_split(struct resource *root,
|
||||
resource_size_t start, resource_size_t end,
|
||||
const char *name)
|
||||
{
|
||||
struct resource *parent = root;
|
||||
struct resource *conflict;
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
|
||||
if (!res)
|
||||
return;
|
||||
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = end;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
|
||||
for (;;) {
|
||||
conflict = __request_resource(parent, res);
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
res = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!res) {
|
||||
/* failed, split and try again */
|
||||
|
||||
/* conflict covered whole area */
|
||||
if (conflict->start <= start && conflict->end >= end)
|
||||
return;
|
||||
|
||||
if (conflict->start > start)
|
||||
__reserve_region_with_split(root, start, conflict->start-1, name);
|
||||
if (!(conflict->flags & IORESOURCE_BUSY)) {
|
||||
resource_size_t common_start, common_end;
|
||||
|
||||
common_start = max(conflict->start, start);
|
||||
common_end = min(conflict->end, end);
|
||||
if (common_start < common_end)
|
||||
__reserve_region_with_split(root, common_start, common_end, name);
|
||||
}
|
||||
if (conflict->end < end)
|
||||
__reserve_region_with_split(root, conflict->end+1, end, name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void reserve_region_with_split(struct resource *root,
|
||||
resource_size_t start, resource_size_t end,
|
||||
const char *name)
|
||||
{
|
||||
write_lock(&resource_lock);
|
||||
__reserve_region_with_split(root, start, end, name);
|
||||
write_unlock(&resource_lock);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(adjust_resource);
|
||||
|
||||
/**
|
||||
@@ -562,33 +626,34 @@ struct resource * __request_region(struct resource *parent,
|
||||
{
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
|
||||
if (res) {
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
if (!res)
|
||||
return NULL;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
|
||||
for (;;) {
|
||||
struct resource *conflict;
|
||||
write_lock(&resource_lock);
|
||||
|
||||
conflict = __request_resource(parent, res);
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
continue;
|
||||
}
|
||||
for (;;) {
|
||||
struct resource *conflict;
|
||||
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
res = NULL;
|
||||
conflict = __request_resource(parent, res);
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
continue;
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
res = NULL;
|
||||
break;
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(__request_region);
|
||||
@@ -763,3 +828,40 @@ static int __init reserve_setup(char *str)
|
||||
}
|
||||
|
||||
__setup("reserve=", reserve_setup);
|
||||
|
||||
/*
|
||||
* Check if the requested addr and size spans more than any slot in the
|
||||
* iomem resource tree.
|
||||
*/
|
||||
int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
|
||||
{
|
||||
struct resource *p = &iomem_resource;
|
||||
int err = 0;
|
||||
loff_t l;
|
||||
|
||||
read_lock(&resource_lock);
|
||||
for (p = p->child; p ; p = r_next(NULL, p, &l)) {
|
||||
/*
|
||||
* We can probably skip the resources without
|
||||
* IORESOURCE_IO attribute?
|
||||
*/
|
||||
if (p->start >= addr + size)
|
||||
continue;
|
||||
if (p->end < addr)
|
||||
continue;
|
||||
if (p->start <= addr && (p->end >= addr + size - 1))
|
||||
continue;
|
||||
printk(KERN_WARNING "resource map sanity check conflict: "
|
||||
"0x%llx 0x%llx 0x%llx 0x%llx %s\n",
|
||||
(unsigned long long)addr,
|
||||
(unsigned long long)(addr + size - 1),
|
||||
(unsigned long long)p->start,
|
||||
(unsigned long long)p->end,
|
||||
p->name);
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
read_unlock(&resource_lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
438
kernel/sched.c
438
kernel/sched.c
@@ -201,14 +201,19 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
|
||||
hrtimer_init(&rt_b->rt_period_timer,
|
||||
CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
rt_b->rt_period_timer.function = sched_rt_period_timer;
|
||||
rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
|
||||
}
|
||||
|
||||
static inline int rt_bandwidth_enabled(void)
|
||||
{
|
||||
return sysctl_sched_rt_runtime >= 0;
|
||||
}
|
||||
|
||||
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||
{
|
||||
ktime_t now;
|
||||
|
||||
if (rt_b->rt_runtime == RUNTIME_INF)
|
||||
if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
|
||||
return;
|
||||
|
||||
if (hrtimer_active(&rt_b->rt_period_timer))
|
||||
@@ -297,9 +302,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
|
||||
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
|
||||
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
#else /* !CONFIG_FAIR_GROUP_SCHED */
|
||||
#else /* !CONFIG_USER_SCHED */
|
||||
#define root_task_group init_task_group
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
#endif /* CONFIG_USER_SCHED */
|
||||
|
||||
/* task_group_lock serializes add/remove of task groups and also changes to
|
||||
* a task group's cpu shares.
|
||||
@@ -603,9 +608,9 @@ struct rq {
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
||||
static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
|
||||
static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
rq->curr->sched_class->check_preempt_curr(rq, p);
|
||||
rq->curr->sched_class->check_preempt_curr(rq, p, sync);
|
||||
}
|
||||
|
||||
static inline int cpu_of(struct rq *rq)
|
||||
@@ -1086,7 +1091,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static void init_hrtick(void)
|
||||
static __init void init_hrtick(void)
|
||||
{
|
||||
hotcpu_notifier(hotplug_hrtick, 0);
|
||||
}
|
||||
@@ -1101,7 +1106,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
|
||||
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static void init_hrtick(void)
|
||||
static inline void init_hrtick(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
@@ -1118,9 +1123,9 @@ static void init_rq_hrtick(struct rq *rq)
|
||||
|
||||
hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
rq->hrtick_timer.function = hrtick;
|
||||
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
|
||||
}
|
||||
#else
|
||||
#else /* CONFIG_SCHED_HRTICK */
|
||||
static inline void hrtick_clear(struct rq *rq)
|
||||
{
|
||||
}
|
||||
@@ -1132,7 +1137,7 @@ static inline void init_rq_hrtick(struct rq *rq)
|
||||
static inline void init_hrtick(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
|
||||
/*
|
||||
* resched_task - mark a task 'to be rescheduled now'.
|
||||
@@ -1379,6 +1384,51 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
|
||||
update_load_sub(&rq->load, load);
|
||||
}
|
||||
|
||||
#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
|
||||
typedef int (*tg_visitor)(struct task_group *, void *);
|
||||
|
||||
/*
|
||||
* Iterate the full tree, calling @down when first entering a node and @up when
|
||||
* leaving it for the final time.
|
||||
*/
|
||||
static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
|
||||
{
|
||||
struct task_group *parent, *child;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
parent = &root_task_group;
|
||||
down:
|
||||
ret = (*down)(parent, data);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
||||
parent = child;
|
||||
goto down;
|
||||
|
||||
up:
|
||||
continue;
|
||||
}
|
||||
ret = (*up)(parent, data);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
child = parent;
|
||||
parent = parent->parent;
|
||||
if (parent)
|
||||
goto up;
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tg_nop(struct task_group *tg, void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned long source_load(int cpu, int type);
|
||||
static unsigned long target_load(int cpu, int type);
|
||||
@@ -1396,37 +1446,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
|
||||
|
||||
/*
|
||||
* Iterate the full tree, calling @down when first entering a node and @up when
|
||||
* leaving it for the final time.
|
||||
*/
|
||||
static void
|
||||
walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
|
||||
{
|
||||
struct task_group *parent, *child;
|
||||
|
||||
rcu_read_lock();
|
||||
parent = &root_task_group;
|
||||
down:
|
||||
(*down)(parent, cpu, sd);
|
||||
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
||||
parent = child;
|
||||
goto down;
|
||||
|
||||
up:
|
||||
continue;
|
||||
}
|
||||
(*up)(parent, cpu, sd);
|
||||
|
||||
child = parent;
|
||||
parent = parent->parent;
|
||||
if (parent)
|
||||
goto up;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
|
||||
|
||||
/*
|
||||
@@ -1485,11 +1504,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
* This needs to be done in a bottom-up fashion because the rq weight of a
|
||||
* parent group depends on the shares of its child groups.
|
||||
*/
|
||||
static void
|
||||
tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
static int tg_shares_up(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long rq_weight = 0;
|
||||
unsigned long shares = 0;
|
||||
struct sched_domain *sd = data;
|
||||
int i;
|
||||
|
||||
for_each_cpu_mask(i, sd->span) {
|
||||
@@ -1514,6 +1533,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
__update_group_shares_cpu(tg, i, shares, rq_weight);
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1521,10 +1542,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
* This needs to be done in a top-down fashion because the load of a child
|
||||
* group is a fraction of its parents load.
|
||||
*/
|
||||
static void
|
||||
tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
static int tg_load_down(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long load;
|
||||
long cpu = (long)data;
|
||||
|
||||
if (!tg->parent) {
|
||||
load = cpu_rq(cpu)->load.weight;
|
||||
@@ -1535,11 +1556,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
}
|
||||
|
||||
tg->cfs_rq[cpu]->h_load = load;
|
||||
}
|
||||
|
||||
static void
|
||||
tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_shares(struct sched_domain *sd)
|
||||
@@ -1549,7 +1567,7 @@ static void update_shares(struct sched_domain *sd)
|
||||
|
||||
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
|
||||
sd->last_update = now;
|
||||
walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
|
||||
walk_tg_tree(tg_nop, tg_shares_up, sd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1560,9 +1578,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
|
||||
spin_lock(&rq->lock);
|
||||
}
|
||||
|
||||
static void update_h_load(int cpu)
|
||||
static void update_h_load(long cpu)
|
||||
{
|
||||
walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
|
||||
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -1920,11 +1938,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
|
||||
running = task_running(rq, p);
|
||||
on_rq = p->se.on_rq;
|
||||
ncsw = 0;
|
||||
if (!match_state || p->state == match_state) {
|
||||
ncsw = p->nivcsw + p->nvcsw;
|
||||
if (unlikely(!ncsw))
|
||||
ncsw = 1;
|
||||
}
|
||||
if (!match_state || p->state == match_state)
|
||||
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
/*
|
||||
@@ -2284,7 +2299,7 @@ out_running:
|
||||
trace_mark(kernel_sched_wakeup,
|
||||
"pid %d state %ld ## rq %p task %p rq->curr %p",
|
||||
p->pid, p->state, rq, p, rq->curr);
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, sync);
|
||||
|
||||
p->state = TASK_RUNNING;
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -2419,7 +2434,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
|
||||
trace_mark(kernel_sched_wakeup_new,
|
||||
"pid %d state %ld ## rq %p task %p rq->curr %p",
|
||||
p->pid, p->state, rq, p, rq->curr);
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->sched_class->task_wake_up)
|
||||
p->sched_class->task_wake_up(rq, p);
|
||||
@@ -2879,7 +2894,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
|
||||
* Note that idle threads have a prio of MAX_PRIO, for this test
|
||||
* to be always true for them.
|
||||
*/
|
||||
check_preempt_curr(this_rq, p);
|
||||
check_preempt_curr(this_rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4626,6 +4641,15 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up a single thread waiting on this completion. Threads will be
|
||||
* awakened in the same order in which they were queued.
|
||||
*
|
||||
* See also complete_all(), wait_for_completion() and related routines.
|
||||
*/
|
||||
void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -4637,6 +4661,12 @@ void complete(struct completion *x)
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
/**
|
||||
* complete_all: - signals all threads waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up all threads waiting on this particular completion event.
|
||||
*/
|
||||
void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -4657,10 +4687,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
|
||||
wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||
__add_wait_queue_tail(&x->wait, &wait);
|
||||
do {
|
||||
if ((state == TASK_INTERRUPTIBLE &&
|
||||
signal_pending(current)) ||
|
||||
(state == TASK_KILLABLE &&
|
||||
fatal_signal_pending(current))) {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
@@ -4688,12 +4715,31 @@ wait_for_common(struct completion *x, long timeout, int state)
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_completion: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout.
|
||||
*
|
||||
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
|
||||
* and interrupt capability. Also see complete().
|
||||
*/
|
||||
void __sched wait_for_completion(struct completion *x)
|
||||
{
|
||||
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion);
|
||||
|
||||
/**
|
||||
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
@@ -4701,6 +4747,13 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits for completion of a specific task to be signaled. It is
|
||||
* interruptible.
|
||||
*/
|
||||
int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
||||
@@ -4710,6 +4763,14 @@ int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
@@ -4718,6 +4779,13 @@ wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable: - waits for completion of a task (killable)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It can be
|
||||
* interrupted by a kill signal.
|
||||
*/
|
||||
int __sched wait_for_completion_killable(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
||||
@@ -5120,7 +5188,8 @@ recheck:
|
||||
* Do not allow realtime tasks into groups that have no runtime
|
||||
* assigned.
|
||||
*/
|
||||
if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
|
||||
if (rt_bandwidth_enabled() && rt_policy(policy) &&
|
||||
task_group(p)->rt_bandwidth.rt_runtime == 0)
|
||||
return -EPERM;
|
||||
#endif
|
||||
|
||||
@@ -5956,7 +6025,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
||||
set_task_cpu(p, dest_cpu);
|
||||
if (on_rq) {
|
||||
activate_task(rq_dest, p, 0);
|
||||
check_preempt_curr(rq_dest, p);
|
||||
check_preempt_curr(rq_dest, p, 0);
|
||||
}
|
||||
done:
|
||||
ret = 1;
|
||||
@@ -6281,7 +6350,7 @@ set_table_entry(struct ctl_table *entry,
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(12);
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(13);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
@@ -6309,7 +6378,9 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
sizeof(int), 0644, proc_dointvec_minmax);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax);
|
||||
/* &table[11] is terminator */
|
||||
set_table_entry(&table[11], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring);
|
||||
/* &table[12] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
@@ -7193,13 +7264,21 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||
* Non-inlined to reduce accumulated stack pressure in build_sched_domains()
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
# define SD_INIT_NAME(sd, type) sd->name = #type
|
||||
#else
|
||||
# define SD_INIT_NAME(sd, type) do { } while (0)
|
||||
#endif
|
||||
|
||||
#define SD_INIT(sd, type) sd_init_##type(sd)
|
||||
|
||||
#define SD_INIT_FUNC(type) \
|
||||
static noinline void sd_init_##type(struct sched_domain *sd) \
|
||||
{ \
|
||||
memset(sd, 0, sizeof(*sd)); \
|
||||
*sd = SD_##type##_INIT; \
|
||||
sd->level = SD_LV_##type; \
|
||||
SD_INIT_NAME(sd, type); \
|
||||
}
|
||||
|
||||
SD_INIT_FUNC(CPU)
|
||||
@@ -7695,24 +7774,27 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
|
||||
* and partition_sched_domains() will fallback to the single partition
|
||||
* 'fallback_doms', it also forces the domains to be rebuilt.
|
||||
*
|
||||
* If doms_new==NULL it will be replaced with cpu_online_map.
|
||||
* ndoms_new==0 is a special case for destroying existing domains.
|
||||
* It will not create the default domain.
|
||||
*
|
||||
* Call with hotplug lock held
|
||||
*/
|
||||
void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
|
||||
struct sched_domain_attr *dattr_new)
|
||||
{
|
||||
int i, j;
|
||||
int i, j, n;
|
||||
|
||||
mutex_lock(&sched_domains_mutex);
|
||||
|
||||
/* always unregister in case we don't destroy any domains */
|
||||
unregister_sched_domain_sysctl();
|
||||
|
||||
if (doms_new == NULL)
|
||||
ndoms_new = 0;
|
||||
n = doms_new ? ndoms_new : 0;
|
||||
|
||||
/* Destroy deleted domains */
|
||||
for (i = 0; i < ndoms_cur; i++) {
|
||||
for (j = 0; j < ndoms_new; j++) {
|
||||
for (j = 0; j < n; j++) {
|
||||
if (cpus_equal(doms_cur[i], doms_new[j])
|
||||
&& dattrs_equal(dattr_cur, i, dattr_new, j))
|
||||
goto match1;
|
||||
@@ -7725,7 +7807,6 @@ match1:
|
||||
|
||||
if (doms_new == NULL) {
|
||||
ndoms_cur = 0;
|
||||
ndoms_new = 1;
|
||||
doms_new = &fallback_doms;
|
||||
cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
|
||||
dattr_new = NULL;
|
||||
@@ -7762,8 +7843,13 @@ match2:
|
||||
int arch_reinit_sched_domains(void)
|
||||
{
|
||||
get_online_cpus();
|
||||
|
||||
/* Destroy domains first to force the rebuild */
|
||||
partition_sched_domains(0, NULL, NULL);
|
||||
|
||||
rebuild_sched_domains();
|
||||
put_online_cpus();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -7847,7 +7933,7 @@ static int update_sched_domains(struct notifier_block *nfb,
|
||||
case CPU_ONLINE_FROZEN:
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
partition_sched_domains(0, NULL, NULL);
|
||||
partition_sched_domains(1, NULL, NULL);
|
||||
return NOTIFY_OK;
|
||||
|
||||
default:
|
||||
@@ -8234,20 +8320,25 @@ void __might_sleep(char *file, int line)
|
||||
#ifdef in_atomic
|
||||
static unsigned long prev_jiffy; /* ratelimiting */
|
||||
|
||||
if ((in_atomic() || irqs_disabled()) &&
|
||||
system_state == SYSTEM_RUNNING && !oops_in_progress) {
|
||||
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
|
||||
return;
|
||||
prev_jiffy = jiffies;
|
||||
printk(KERN_ERR "BUG: sleeping function called from invalid"
|
||||
" context at %s:%d\n", file, line);
|
||||
printk("in_atomic():%d, irqs_disabled():%d\n",
|
||||
in_atomic(), irqs_disabled());
|
||||
debug_show_held_locks(current);
|
||||
if (irqs_disabled())
|
||||
print_irqtrace_events(current);
|
||||
dump_stack();
|
||||
}
|
||||
if ((!in_atomic() && !irqs_disabled()) ||
|
||||
system_state != SYSTEM_RUNNING || oops_in_progress)
|
||||
return;
|
||||
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
|
||||
return;
|
||||
prev_jiffy = jiffies;
|
||||
|
||||
printk(KERN_ERR
|
||||
"BUG: sleeping function called from invalid context at %s:%d\n",
|
||||
file, line);
|
||||
printk(KERN_ERR
|
||||
"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
|
||||
in_atomic(), irqs_disabled(),
|
||||
current->pid, current->comm);
|
||||
|
||||
debug_show_held_locks(current);
|
||||
if (irqs_disabled())
|
||||
print_irqtrace_events(current);
|
||||
dump_stack();
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(__might_sleep);
|
||||
@@ -8745,75 +8836,97 @@ static DEFINE_MUTEX(rt_constraints_mutex);
|
||||
static unsigned long to_ratio(u64 period, u64 runtime)
|
||||
{
|
||||
if (runtime == RUNTIME_INF)
|
||||
return 1ULL << 16;
|
||||
return 1ULL << 20;
|
||||
|
||||
return div64_u64(runtime << 16, period);
|
||||
return div64_u64(runtime << 20, period);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct task_group *tgi, *parent = tg->parent;
|
||||
unsigned long total = 0;
|
||||
|
||||
if (!parent) {
|
||||
if (global_rt_period() < period)
|
||||
return 0;
|
||||
|
||||
return to_ratio(period, runtime) <
|
||||
to_ratio(global_rt_period(), global_rt_runtime());
|
||||
}
|
||||
|
||||
if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(tgi, &parent->children, siblings) {
|
||||
if (tgi == tg)
|
||||
continue;
|
||||
|
||||
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
|
||||
tgi->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return total + to_ratio(period, runtime) <=
|
||||
to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
|
||||
parent->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
#elif defined CONFIG_USER_SCHED
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct task_group *tgi;
|
||||
unsigned long total = 0;
|
||||
unsigned long global_ratio =
|
||||
to_ratio(global_rt_period(), global_rt_runtime());
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(tgi, &task_groups, list) {
|
||||
if (tgi == tg)
|
||||
continue;
|
||||
|
||||
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
|
||||
tgi->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return total + to_ratio(period, runtime) < global_ratio;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Must be called with tasklist_lock held */
|
||||
static inline int tg_has_rt_tasks(struct task_group *tg)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
do_each_thread(g, p) {
|
||||
if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
|
||||
return 1;
|
||||
} while_each_thread(g, p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct rt_schedulable_data {
|
||||
struct task_group *tg;
|
||||
u64 rt_period;
|
||||
u64 rt_runtime;
|
||||
};
|
||||
|
||||
static int tg_schedulable(struct task_group *tg, void *data)
|
||||
{
|
||||
struct rt_schedulable_data *d = data;
|
||||
struct task_group *child;
|
||||
unsigned long total, sum = 0;
|
||||
u64 period, runtime;
|
||||
|
||||
period = ktime_to_ns(tg->rt_bandwidth.rt_period);
|
||||
runtime = tg->rt_bandwidth.rt_runtime;
|
||||
|
||||
if (tg == d->tg) {
|
||||
period = d->rt_period;
|
||||
runtime = d->rt_runtime;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cannot have more runtime than the period.
|
||||
*/
|
||||
if (runtime > period && runtime != RUNTIME_INF)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Ensure we don't starve existing RT tasks.
|
||||
*/
|
||||
if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
|
||||
return -EBUSY;
|
||||
|
||||
total = to_ratio(period, runtime);
|
||||
|
||||
/*
|
||||
* Nobody can have more than the global setting allows.
|
||||
*/
|
||||
if (total > to_ratio(global_rt_period(), global_rt_runtime()))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The sum of our children's runtime should not exceed our own.
|
||||
*/
|
||||
list_for_each_entry_rcu(child, &tg->children, siblings) {
|
||||
period = ktime_to_ns(child->rt_bandwidth.rt_period);
|
||||
runtime = child->rt_bandwidth.rt_runtime;
|
||||
|
||||
if (child == d->tg) {
|
||||
period = d->rt_period;
|
||||
runtime = d->rt_runtime;
|
||||
}
|
||||
|
||||
sum += to_ratio(period, runtime);
|
||||
}
|
||||
|
||||
if (sum > total)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct rt_schedulable_data data = {
|
||||
.tg = tg,
|
||||
.rt_period = period,
|
||||
.rt_runtime = runtime,
|
||||
};
|
||||
|
||||
return walk_tg_tree(tg_schedulable, tg_nop, &data);
|
||||
}
|
||||
|
||||
static int tg_set_bandwidth(struct task_group *tg,
|
||||
u64 rt_period, u64 rt_runtime)
|
||||
{
|
||||
@@ -8821,14 +8934,9 @@ static int tg_set_bandwidth(struct task_group *tg,
|
||||
|
||||
mutex_lock(&rt_constraints_mutex);
|
||||
read_lock(&tasklist_lock);
|
||||
if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
|
||||
err = -EBUSY;
|
||||
err = __rt_schedulable(tg, rt_period, rt_runtime);
|
||||
if (err)
|
||||
goto unlock;
|
||||
}
|
||||
if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
|
||||
err = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
|
||||
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
|
||||
@@ -8897,16 +9005,25 @@ long sched_group_rt_period(struct task_group *tg)
|
||||
|
||||
static int sched_rt_global_constraints(void)
|
||||
{
|
||||
struct task_group *tg = &root_task_group;
|
||||
u64 rt_runtime, rt_period;
|
||||
u64 runtime, period;
|
||||
int ret = 0;
|
||||
|
||||
rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
|
||||
rt_runtime = tg->rt_bandwidth.rt_runtime;
|
||||
if (sysctl_sched_rt_period <= 0)
|
||||
return -EINVAL;
|
||||
|
||||
runtime = global_rt_runtime();
|
||||
period = global_rt_period();
|
||||
|
||||
/*
|
||||
* Sanity check on the sysctl variables.
|
||||
*/
|
||||
if (runtime > period && runtime != RUNTIME_INF)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&rt_constraints_mutex);
|
||||
if (!__rt_schedulable(tg, rt_period, rt_runtime))
|
||||
ret = -EINVAL;
|
||||
read_lock(&tasklist_lock);
|
||||
ret = __rt_schedulable(NULL, 0, 0);
|
||||
read_unlock(&tasklist_lock);
|
||||
mutex_unlock(&rt_constraints_mutex);
|
||||
|
||||
return ret;
|
||||
@@ -8917,6 +9034,9 @@ static int sched_rt_global_constraints(void)
|
||||
unsigned long flags;
|
||||
int i;
|
||||
|
||||
if (sysctl_sched_rt_period <= 0)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
|
||||
for_each_possible_cpu(i) {
|
||||
struct rt_rq *rt_rq = &cpu_rq(i)->rt;
|
||||
@@ -8977,7 +9097,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
|
||||
if (!cgrp->parent) {
|
||||
/* This is early initialization for the top cgroup */
|
||||
init_task_group.css.cgroup = cgrp;
|
||||
return &init_task_group.css;
|
||||
}
|
||||
|
||||
@@ -8986,9 +9105,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
if (IS_ERR(tg))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Bind the cgroup to task_group object we just created */
|
||||
tg->css.cgroup = cgrp;
|
||||
|
||||
return &tg->css;
|
||||
}
|
||||
|
||||
|
@@ -118,13 +118,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
||||
|
||||
/*
|
||||
* scd->clock = clamp(scd->tick_gtod + delta,
|
||||
* max(scd->tick_gtod, scd->clock),
|
||||
* scd->tick_gtod + TICK_NSEC);
|
||||
* max(scd->tick_gtod, scd->clock),
|
||||
* max(scd->clock, scd->tick_gtod + TICK_NSEC));
|
||||
*/
|
||||
|
||||
clock = scd->tick_gtod + delta;
|
||||
min_clock = wrap_max(scd->tick_gtod, scd->clock);
|
||||
max_clock = scd->tick_gtod + TICK_NSEC;
|
||||
max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC);
|
||||
|
||||
clock = wrap_max(clock, min_clock);
|
||||
clock = wrap_min(clock, max_clock);
|
||||
|
@@ -333,12 +333,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
unsigned long flags;
|
||||
int num_threads = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
if (lock_task_sighand(p, &flags)) {
|
||||
num_threads = atomic_read(&p->signal->count);
|
||||
unlock_task_sighand(p, &flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
|
||||
SEQ_printf(m,
|
||||
|
@@ -408,64 +408,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
return __sched_period(nr_running);
|
||||
}
|
||||
|
||||
/*
|
||||
* The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
|
||||
* that it favours >=0 over <0.
|
||||
*
|
||||
* -20 |
|
||||
* |
|
||||
* 0 --------+-------
|
||||
* .'
|
||||
* 19 .'
|
||||
*
|
||||
*/
|
||||
static unsigned long
|
||||
calc_delta_asym(unsigned long delta, struct sched_entity *se)
|
||||
{
|
||||
struct load_weight lw = {
|
||||
.weight = NICE_0_LOAD,
|
||||
.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
|
||||
};
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct load_weight *se_lw = &se->load;
|
||||
unsigned long rw = cfs_rq_of(se)->load.weight;
|
||||
|
||||
#ifdef CONFIG_FAIR_SCHED_GROUP
|
||||
struct cfs_rq *cfs_rq = se->my_q;
|
||||
struct task_group *tg = NULL
|
||||
|
||||
if (cfs_rq)
|
||||
tg = cfs_rq->tg;
|
||||
|
||||
if (tg && tg->shares < NICE_0_LOAD) {
|
||||
/*
|
||||
* scale shares to what it would have been had
|
||||
* tg->weight been NICE_0_LOAD:
|
||||
*
|
||||
* weight = 1024 * shares / tg->weight
|
||||
*/
|
||||
lw.weight *= se->load.weight;
|
||||
lw.weight /= tg->shares;
|
||||
|
||||
lw.inv_weight = 0;
|
||||
|
||||
se_lw = &lw;
|
||||
rw += lw.weight - se->load.weight;
|
||||
} else
|
||||
#endif
|
||||
|
||||
if (se->load.weight < NICE_0_LOAD) {
|
||||
se_lw = &lw;
|
||||
rw += NICE_0_LOAD - se->load.weight;
|
||||
}
|
||||
|
||||
delta = calc_delta_mine(delta, rw, se_lw);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics. Skip current tasks that
|
||||
* are not in our scheduling class.
|
||||
@@ -586,11 +528,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
if (entity_is_task(se)) {
|
||||
add_cfs_task_weight(cfs_rq, se->load.weight);
|
||||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
cfs_rq->nr_running++;
|
||||
se->on_rq = 1;
|
||||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -599,11 +542,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
if (entity_is_task(se)) {
|
||||
add_cfs_task_weight(cfs_rq, -se->load.weight);
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
cfs_rq->nr_running--;
|
||||
se->on_rq = 0;
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
|
||||
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -1085,7 +1029,6 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
long wl, long wg)
|
||||
{
|
||||
struct sched_entity *se = tg->se[cpu];
|
||||
long more_w;
|
||||
|
||||
if (!tg->parent)
|
||||
return wl;
|
||||
@@ -1097,18 +1040,17 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
if (!wl && sched_feat(ASYM_EFF_LOAD))
|
||||
return wl;
|
||||
|
||||
/*
|
||||
* Instead of using this increment, also add the difference
|
||||
* between when the shares were last updated and now.
|
||||
*/
|
||||
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||
wl += more_w;
|
||||
wg += more_w;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
#define D(n) (likely(n) ? (n) : 1)
|
||||
|
||||
long S, rw, s, a, b;
|
||||
long more_w;
|
||||
|
||||
/*
|
||||
* Instead of using this increment, also add the difference
|
||||
* between when the shares were last updated and now.
|
||||
*/
|
||||
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||
wl += more_w;
|
||||
wg += more_w;
|
||||
|
||||
S = se->my_q->tg->shares;
|
||||
s = se->my_q->shares;
|
||||
@@ -1117,7 +1059,11 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
a = S*(rw + wl);
|
||||
b = S*rw + s*wg;
|
||||
|
||||
wl = s*(a-b)/D(b);
|
||||
wl = s*(a-b);
|
||||
|
||||
if (likely(b))
|
||||
wl /= b;
|
||||
|
||||
/*
|
||||
* Assume the group is already running and will
|
||||
* thus already be accounted for in the weight.
|
||||
@@ -1126,7 +1072,6 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
* alter the group weight.
|
||||
*/
|
||||
wg = 0;
|
||||
#undef D
|
||||
}
|
||||
|
||||
return wl;
|
||||
@@ -1143,7 +1088,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
|
||||
#endif
|
||||
|
||||
static int
|
||||
wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||
wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
|
||||
struct task_struct *p, int prev_cpu, int this_cpu, int sync,
|
||||
int idx, unsigned long load, unsigned long this_load,
|
||||
unsigned int imbalance)
|
||||
@@ -1158,6 +1103,11 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
|
||||
return 0;
|
||||
|
||||
if (!sync && sched_feat(SYNC_WAKEUPS) &&
|
||||
curr->se.avg_overlap < sysctl_sched_migration_cost &&
|
||||
p->se.avg_overlap < sysctl_sched_migration_cost)
|
||||
sync = 1;
|
||||
|
||||
/*
|
||||
* If sync wakeup then subtract the (maximum possible)
|
||||
* effect of the currently running task from the load
|
||||
@@ -1182,17 +1132,14 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||
* a reasonable amount of time then attract this newly
|
||||
* woken task:
|
||||
*/
|
||||
if (sync && balanced) {
|
||||
if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
|
||||
p->se.avg_overlap < sysctl_sched_migration_cost)
|
||||
return 1;
|
||||
}
|
||||
if (sync && balanced)
|
||||
return 1;
|
||||
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
|
||||
balanced) {
|
||||
if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
|
||||
tl_per_task)) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
@@ -1211,16 +1158,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
int prev_cpu, this_cpu, new_cpu;
|
||||
unsigned long load, this_load;
|
||||
struct rq *rq, *this_rq;
|
||||
struct rq *this_rq;
|
||||
unsigned int imbalance;
|
||||
int idx;
|
||||
|
||||
prev_cpu = task_cpu(p);
|
||||
rq = task_rq(p);
|
||||
this_cpu = smp_processor_id();
|
||||
this_rq = cpu_rq(this_cpu);
|
||||
new_cpu = prev_cpu;
|
||||
|
||||
if (prev_cpu == this_cpu)
|
||||
goto out;
|
||||
/*
|
||||
* 'this_sd' is the first domain that both
|
||||
* this_cpu and prev_cpu are present in:
|
||||
@@ -1248,13 +1196,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
load = source_load(prev_cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
|
||||
if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
|
||||
load, this_load, imbalance))
|
||||
return this_cpu;
|
||||
|
||||
if (prev_cpu == this_cpu)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
@@ -1281,62 +1226,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)
|
||||
* + nice tasks.
|
||||
*/
|
||||
if (sched_feat(ASYM_GRAN))
|
||||
gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
|
||||
else
|
||||
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
|
||||
gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
|
||||
|
||||
return gran;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should 'se' preempt 'curr'.
|
||||
*
|
||||
* |s1
|
||||
* |s2
|
||||
* |s3
|
||||
* g
|
||||
* |<--->|c
|
||||
*
|
||||
* w(c, s1) = -1
|
||||
* w(c, s2) = 0
|
||||
* w(c, s3) = 1
|
||||
*
|
||||
*/
|
||||
static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
s64 gran, vdiff = curr->vruntime - se->vruntime;
|
||||
|
||||
if (vdiff < 0)
|
||||
return -1;
|
||||
|
||||
gran = wakeup_gran(curr);
|
||||
if (vdiff > gran)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* return depth at which a sched entity is present in the hierarchy */
|
||||
static inline int depth_se(struct sched_entity *se)
|
||||
{
|
||||
int depth = 0;
|
||||
|
||||
for_each_sched_entity(se)
|
||||
depth++;
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
int se_depth, pse_depth;
|
||||
s64 delta_exec;
|
||||
|
||||
if (unlikely(rt_prio(p->prio))) {
|
||||
update_rq_clock(rq);
|
||||
@@ -1350,6 +1253,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
|
||||
cfs_rq_of(pse)->next = pse;
|
||||
|
||||
/*
|
||||
* We can come here with TIF_NEED_RESCHED already set from new task
|
||||
* wake up path.
|
||||
*/
|
||||
if (test_tsk_need_resched(curr))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Batch tasks do not preempt (their preemption is driven by
|
||||
* the tick):
|
||||
@@ -1360,33 +1270,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
if (!sched_feat(WAKEUP_PREEMPT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* preemption test can be made between sibling entities who are in the
|
||||
* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
|
||||
* both tasks until we find their ancestors who are siblings of common
|
||||
* parent.
|
||||
*/
|
||||
|
||||
/* First walk up until both entities are at same depth */
|
||||
se_depth = depth_se(se);
|
||||
pse_depth = depth_se(pse);
|
||||
|
||||
while (se_depth > pse_depth) {
|
||||
se_depth--;
|
||||
se = parent_entity(se);
|
||||
if (sched_feat(WAKEUP_OVERLAP) && (sync ||
|
||||
(se->avg_overlap < sysctl_sched_migration_cost &&
|
||||
pse->avg_overlap < sysctl_sched_migration_cost))) {
|
||||
resched_task(curr);
|
||||
return;
|
||||
}
|
||||
|
||||
while (pse_depth > se_depth) {
|
||||
pse_depth--;
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
while (!is_same_group(se, pse)) {
|
||||
se = parent_entity(se);
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
if (wakeup_preempt_entity(se, pse) == 1)
|
||||
delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
||||
if (delta_exec > wakeup_gran(pse))
|
||||
resched_task(curr);
|
||||
}
|
||||
|
||||
@@ -1445,19 +1337,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
/* Skip over entities that are not tasks */
|
||||
do {
|
||||
se = list_entry(next, struct sched_entity, group_node);
|
||||
next = next->next;
|
||||
} while (next != &cfs_rq->tasks && !entity_is_task(se));
|
||||
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
cfs_rq->balance_iterator = next;
|
||||
|
||||
if (entity_is_task(se))
|
||||
p = task_of(se);
|
||||
se = list_entry(next, struct sched_entity, group_node);
|
||||
p = task_of(se);
|
||||
cfs_rq->balance_iterator = next->next;
|
||||
|
||||
return p;
|
||||
}
|
||||
@@ -1507,7 +1389,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
rcu_read_lock();
|
||||
update_h_load(busiest_cpu);
|
||||
|
||||
list_for_each_entry(tg, &task_groups, list) {
|
||||
list_for_each_entry_rcu(tg, &task_groups, list) {
|
||||
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
|
||||
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
|
||||
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
|
||||
@@ -1620,10 +1502,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
|
||||
* 'current' within the tree based on its new key value.
|
||||
*/
|
||||
swap(curr->vruntime, se->vruntime);
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
enqueue_task_fair(rq, p, 0);
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1642,7 +1524,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1659,7 +1541,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/* Account for a task changing its policy or group.
|
||||
|
@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
|
||||
SCHED_FEAT(LB_BIAS, 1)
|
||||
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
|
||||
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||
SCHED_FEAT(WAKEUP_OVERLAP, 0)
|
||||
|
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
resched_task(rq->idle);
|
||||
}
|
||||
@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
|
||||
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
|
||||
struct sched_rt_entity *rt_se = rt_rq->rt_se;
|
||||
|
||||
if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
|
||||
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
|
||||
|
||||
enqueue_rt_entity(rt_se);
|
||||
if (rt_rq->rt_nr_running) {
|
||||
if (rt_se && !on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se);
|
||||
if (rt_rq->highest_prio < curr->prio)
|
||||
resched_task(curr);
|
||||
}
|
||||
@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* We ran out of runtime, see if we can borrow some from our neighbours.
|
||||
*/
|
||||
static int do_balance_runtime(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||
@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
|
||||
continue;
|
||||
|
||||
spin_lock(&iter->rt_runtime_lock);
|
||||
/*
|
||||
* Either all rqs have inf runtime and there's nothing to steal
|
||||
* or __disable_runtime() below sets a specific rq to inf to
|
||||
* indicate its been disabled and disalow stealing.
|
||||
*/
|
||||
if (iter->rt_runtime == RUNTIME_INF)
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* From runqueues with spare time, take 1/n part of their
|
||||
* spare time, but no more than our period.
|
||||
*/
|
||||
diff = iter->rt_runtime - iter->rt_time;
|
||||
if (diff > 0) {
|
||||
diff = div_u64((u64)diff, weight);
|
||||
@@ -274,6 +286,9 @@ next:
|
||||
return more;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure this RQ takes back all the runtime it lend to its neighbours.
|
||||
*/
|
||||
static void __disable_runtime(struct rq *rq)
|
||||
{
|
||||
struct root_domain *rd = rq->rd;
|
||||
@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
|
||||
|
||||
spin_lock(&rt_b->rt_runtime_lock);
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
/*
|
||||
* Either we're all inf and nobody needs to borrow, or we're
|
||||
* already disabled and thus have nothing to do, or we have
|
||||
* exactly the right amount of runtime to take out.
|
||||
*/
|
||||
if (rt_rq->rt_runtime == RUNTIME_INF ||
|
||||
rt_rq->rt_runtime == rt_b->rt_runtime)
|
||||
goto balanced;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
|
||||
/*
|
||||
* Calculate the difference between what we started out with
|
||||
* and what we current have, that's the amount of runtime
|
||||
* we lend and now have to reclaim.
|
||||
*/
|
||||
want = rt_b->rt_runtime - rt_rq->rt_runtime;
|
||||
|
||||
/*
|
||||
* Greedy reclaim, take back as much as we can.
|
||||
*/
|
||||
for_each_cpu_mask(i, rd->span) {
|
||||
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
||||
s64 diff;
|
||||
|
||||
/*
|
||||
* Can't reclaim from ourselves or disabled runqueues.
|
||||
*/
|
||||
if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
|
||||
continue;
|
||||
|
||||
@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
|
||||
}
|
||||
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
/*
|
||||
* We cannot be left wanting - that would mean some runtime
|
||||
* leaked out of the system.
|
||||
*/
|
||||
BUG_ON(want);
|
||||
balanced:
|
||||
/*
|
||||
* Disable all the borrow logic by pretending we have inf
|
||||
* runtime - in which case borrowing doesn't make sense.
|
||||
*/
|
||||
rt_rq->rt_runtime = RUNTIME_INF;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
spin_unlock(&rt_b->rt_runtime_lock);
|
||||
@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
|
||||
if (unlikely(!scheduler_running))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Reset each runqueue's bandwidth settings
|
||||
*/
|
||||
for_each_leaf_rt_rq(rt_rq, rq) {
|
||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||
|
||||
@@ -350,6 +392,7 @@ static void __enable_runtime(struct rq *rq)
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
rt_rq->rt_runtime = rt_b->rt_runtime;
|
||||
rt_rq->rt_time = 0;
|
||||
rt_rq->rt_throttled = 0;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
spin_unlock(&rt_b->rt_runtime_lock);
|
||||
}
|
||||
@@ -388,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
int i, idle = 1;
|
||||
cpumask_t span;
|
||||
|
||||
if (rt_b->rt_runtime == RUNTIME_INF)
|
||||
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
|
||||
return 1;
|
||||
|
||||
span = sched_rt_period_mask();
|
||||
@@ -486,6 +529,9 @@ static void update_curr_rt(struct rq *rq)
|
||||
curr->se.exec_start = rq->clock;
|
||||
cpuacct_charge(curr, delta_exec);
|
||||
|
||||
if (!rt_bandwidth_enabled())
|
||||
return;
|
||||
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
rt_rq = rt_rq_of_se(rt_se);
|
||||
|
||||
@@ -783,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
if (p->prio < rq->curr->prio) {
|
||||
resched_task(rq->curr);
|
||||
|
@@ -46,7 +46,7 @@ irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
|
||||
EXPORT_SYMBOL(irq_stat);
|
||||
#endif
|
||||
|
||||
static struct softirq_action softirq_vec[32] __cacheline_aligned_in_smp;
|
||||
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
|
||||
|
||||
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
|
||||
|
||||
@@ -205,7 +205,18 @@ restart:
|
||||
|
||||
do {
|
||||
if (pending & 1) {
|
||||
int prev_count = preempt_count();
|
||||
|
||||
h->action(h);
|
||||
|
||||
if (unlikely(prev_count != preempt_count())) {
|
||||
printk(KERN_ERR "huh, entered softirq %td %p"
|
||||
"with preempt_count %08x,"
|
||||
" exited with %08x?\n", h - softirq_vec,
|
||||
h->action, prev_count, preempt_count());
|
||||
preempt_count() = prev_count;
|
||||
}
|
||||
|
||||
rcu_bh_qsctr_inc(cpu);
|
||||
}
|
||||
h++;
|
||||
|
@@ -226,7 +226,7 @@ static void check_hung_uninterruptible_tasks(int this_cpu)
|
||||
* If the system crashed already then all bets are off,
|
||||
* do not report extra hung tasks:
|
||||
*/
|
||||
if ((tainted & TAINT_DIE) || did_panic)
|
||||
if (test_taint(TAINT_DIE) || did_panic)
|
||||
return;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
|
38
kernel/sys.c
38
kernel/sys.c
@@ -1060,9 +1060,7 @@ asmlinkage long sys_setsid(void)
|
||||
group_leader->signal->leader = 1;
|
||||
__set_special_pids(sid);
|
||||
|
||||
spin_lock(&group_leader->sighand->siglock);
|
||||
group_leader->signal->tty = NULL;
|
||||
spin_unlock(&group_leader->sighand->siglock);
|
||||
proc_clear_tty(group_leader);
|
||||
|
||||
err = session;
|
||||
out:
|
||||
@@ -1351,8 +1349,10 @@ asmlinkage long sys_sethostname(char __user *name, int len)
|
||||
down_write(&uts_sem);
|
||||
errno = -EFAULT;
|
||||
if (!copy_from_user(tmp, name, len)) {
|
||||
memcpy(utsname()->nodename, tmp, len);
|
||||
utsname()->nodename[len] = 0;
|
||||
struct new_utsname *u = utsname();
|
||||
|
||||
memcpy(u->nodename, tmp, len);
|
||||
memset(u->nodename + len, 0, sizeof(u->nodename) - len);
|
||||
errno = 0;
|
||||
}
|
||||
up_write(&uts_sem);
|
||||
@@ -1364,15 +1364,17 @@ asmlinkage long sys_sethostname(char __user *name, int len)
|
||||
asmlinkage long sys_gethostname(char __user *name, int len)
|
||||
{
|
||||
int i, errno;
|
||||
struct new_utsname *u;
|
||||
|
||||
if (len < 0)
|
||||
return -EINVAL;
|
||||
down_read(&uts_sem);
|
||||
i = 1 + strlen(utsname()->nodename);
|
||||
u = utsname();
|
||||
i = 1 + strlen(u->nodename);
|
||||
if (i > len)
|
||||
i = len;
|
||||
errno = 0;
|
||||
if (copy_to_user(name, utsname()->nodename, i))
|
||||
if (copy_to_user(name, u->nodename, i))
|
||||
errno = -EFAULT;
|
||||
up_read(&uts_sem);
|
||||
return errno;
|
||||
@@ -1397,8 +1399,10 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
|
||||
down_write(&uts_sem);
|
||||
errno = -EFAULT;
|
||||
if (!copy_from_user(tmp, name, len)) {
|
||||
memcpy(utsname()->domainname, tmp, len);
|
||||
utsname()->domainname[len] = 0;
|
||||
struct new_utsname *u = utsname();
|
||||
|
||||
memcpy(u->domainname, tmp, len);
|
||||
memset(u->domainname + len, 0, sizeof(u->domainname) - len);
|
||||
errno = 0;
|
||||
}
|
||||
up_write(&uts_sem);
|
||||
@@ -1452,14 +1456,22 @@ asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim)
|
||||
return -EINVAL;
|
||||
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
|
||||
return -EFAULT;
|
||||
if (new_rlim.rlim_cur > new_rlim.rlim_max)
|
||||
return -EINVAL;
|
||||
old_rlim = current->signal->rlim + resource;
|
||||
if ((new_rlim.rlim_max > old_rlim->rlim_max) &&
|
||||
!capable(CAP_SYS_RESOURCE))
|
||||
return -EPERM;
|
||||
if (resource == RLIMIT_NOFILE && new_rlim.rlim_max > sysctl_nr_open)
|
||||
return -EPERM;
|
||||
|
||||
if (resource == RLIMIT_NOFILE) {
|
||||
if (new_rlim.rlim_max == RLIM_INFINITY)
|
||||
new_rlim.rlim_max = sysctl_nr_open;
|
||||
if (new_rlim.rlim_cur == RLIM_INFINITY)
|
||||
new_rlim.rlim_cur = sysctl_nr_open;
|
||||
if (new_rlim.rlim_max > sysctl_nr_open)
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
if (new_rlim.rlim_cur > new_rlim.rlim_max)
|
||||
return -EINVAL;
|
||||
|
||||
retval = security_task_setrlimit(resource, &new_rlim);
|
||||
if (retval)
|
||||
|
@@ -125,6 +125,12 @@ cond_syscall(sys_vm86old);
|
||||
cond_syscall(sys_vm86);
|
||||
cond_syscall(compat_sys_ipc);
|
||||
cond_syscall(compat_sys_sysctl);
|
||||
cond_syscall(sys_flock);
|
||||
cond_syscall(sys_io_setup);
|
||||
cond_syscall(sys_io_destroy);
|
||||
cond_syscall(sys_io_submit);
|
||||
cond_syscall(sys_io_cancel);
|
||||
cond_syscall(sys_io_getevents);
|
||||
|
||||
/* arch-specific weak syscall entries */
|
||||
cond_syscall(sys_pciconfig_read);
|
||||
|
123
kernel/sysctl.c
123
kernel/sysctl.c
@@ -80,7 +80,6 @@ extern int pid_max_min, pid_max_max;
|
||||
extern int sysctl_drop_caches;
|
||||
extern int percpu_pagelist_fraction;
|
||||
extern int compat_log;
|
||||
extern int maps_protect;
|
||||
extern int latencytop_enabled;
|
||||
extern int sysctl_nr_open_min, sysctl_nr_open_max;
|
||||
#ifdef CONFIG_RCU_TORTURE_TEST
|
||||
@@ -97,7 +96,7 @@ static int sixty = 60;
|
||||
static int neg_one = -1;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
#if defined(CONFIG_MMU) && defined(CONFIG_FILE_LOCKING)
|
||||
static int two = 2;
|
||||
#endif
|
||||
|
||||
@@ -118,10 +117,8 @@ extern char modprobe_path[];
|
||||
extern int sg_big_buff;
|
||||
#endif
|
||||
|
||||
#ifdef __sparc__
|
||||
extern char reboot_command [];
|
||||
extern int stop_a_enabled;
|
||||
extern int scons_pwroff;
|
||||
#ifdef CONFIG_SPARC
|
||||
#include <asm/system.h>
|
||||
#endif
|
||||
|
||||
#ifdef __hppa__
|
||||
@@ -152,7 +149,7 @@ extern int max_lock_depth;
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
|
||||
static int proc_taint(struct ctl_table *table, int write, struct file *filp,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos);
|
||||
#endif
|
||||
|
||||
@@ -382,10 +379,9 @@ static struct ctl_table kern_table[] = {
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
{
|
||||
.procname = "tainted",
|
||||
.data = &tainted,
|
||||
.maxlen = sizeof(int),
|
||||
.maxlen = sizeof(long),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec_taint,
|
||||
.proc_handler = &proc_taint,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_LATENCYTOP
|
||||
@@ -415,7 +411,7 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#ifdef __sparc__
|
||||
#ifdef CONFIG_SPARC
|
||||
{
|
||||
.ctl_name = KERN_SPARC_REBOOT,
|
||||
.procname = "reboot-cmd",
|
||||
@@ -809,16 +805,6 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_FS
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
.procname = "maps_protect",
|
||||
.data = &maps_protect,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.ctl_name = CTL_UNNUMBERED,
|
||||
@@ -1261,6 +1247,7 @@ static struct ctl_table fs_table[] = {
|
||||
.extra1 = &minolduid,
|
||||
.extra2 = &maxolduid,
|
||||
},
|
||||
#ifdef CONFIG_FILE_LOCKING
|
||||
{
|
||||
.ctl_name = FS_LEASES,
|
||||
.procname = "leases-enable",
|
||||
@@ -1269,6 +1256,7 @@ static struct ctl_table fs_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_DNOTIFY
|
||||
{
|
||||
.ctl_name = FS_DIR_NOTIFY,
|
||||
@@ -1280,6 +1268,7 @@ static struct ctl_table fs_table[] = {
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_MMU
|
||||
#ifdef CONFIG_FILE_LOCKING
|
||||
{
|
||||
.ctl_name = FS_LEASE_TIME,
|
||||
.procname = "lease-break-time",
|
||||
@@ -1291,6 +1280,8 @@ static struct ctl_table fs_table[] = {
|
||||
.extra1 = &zero,
|
||||
.extra2 = &two,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_AIO
|
||||
{
|
||||
.procname = "aio-nr",
|
||||
.data = &aio_nr,
|
||||
@@ -1305,6 +1296,7 @@ static struct ctl_table fs_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_doulongvec_minmax,
|
||||
},
|
||||
#endif /* CONFIG_AIO */
|
||||
#ifdef CONFIG_INOTIFY_USER
|
||||
{
|
||||
.ctl_name = FS_INOTIFY,
|
||||
@@ -1510,7 +1502,6 @@ void register_sysctl_root(struct ctl_table_root *root)
|
||||
/* Perform the actual read/write of a sysctl table entry. */
|
||||
static int do_sysctl_strategy(struct ctl_table_root *root,
|
||||
struct ctl_table *table,
|
||||
int __user *name, int nlen,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -1524,8 +1515,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root,
|
||||
return -EPERM;
|
||||
|
||||
if (table->strategy) {
|
||||
rc = table->strategy(table, name, nlen, oldval, oldlenp,
|
||||
newval, newlen);
|
||||
rc = table->strategy(table, oldval, oldlenp, newval, newlen);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
if (rc > 0)
|
||||
@@ -1535,8 +1525,7 @@ static int do_sysctl_strategy(struct ctl_table_root *root,
|
||||
/* If there is no strategy routine, or if the strategy returns
|
||||
* zero, proceed with automatic r/w */
|
||||
if (table->data && table->maxlen) {
|
||||
rc = sysctl_data(table, name, nlen, oldval, oldlenp,
|
||||
newval, newlen);
|
||||
rc = sysctl_data(table, oldval, oldlenp, newval, newlen);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
}
|
||||
@@ -1568,7 +1557,7 @@ repeat:
|
||||
table = table->child;
|
||||
goto repeat;
|
||||
}
|
||||
error = do_sysctl_strategy(root, table, name, nlen,
|
||||
error = do_sysctl_strategy(root, table,
|
||||
oldval, oldlenp,
|
||||
newval, newlen);
|
||||
return error;
|
||||
@@ -2237,49 +2226,39 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
|
||||
NULL,NULL);
|
||||
}
|
||||
|
||||
#define OP_SET 0
|
||||
#define OP_AND 1
|
||||
#define OP_OR 2
|
||||
|
||||
static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
|
||||
int *valp,
|
||||
int write, void *data)
|
||||
{
|
||||
int op = *(int *)data;
|
||||
if (write) {
|
||||
int val = *negp ? -*lvalp : *lvalp;
|
||||
switch(op) {
|
||||
case OP_SET: *valp = val; break;
|
||||
case OP_AND: *valp &= val; break;
|
||||
case OP_OR: *valp |= val; break;
|
||||
}
|
||||
} else {
|
||||
int val = *valp;
|
||||
if (val < 0) {
|
||||
*negp = -1;
|
||||
*lvalp = (unsigned long)-val;
|
||||
} else {
|
||||
*negp = 0;
|
||||
*lvalp = (unsigned long)val;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Taint values can only be increased
|
||||
* Taint values can only be increased
|
||||
* This means we can safely use a temporary.
|
||||
*/
|
||||
static int proc_dointvec_taint(struct ctl_table *table, int write, struct file *filp,
|
||||
static int proc_taint(struct ctl_table *table, int write, struct file *filp,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int op;
|
||||
struct ctl_table t;
|
||||
unsigned long tmptaint = get_taint();
|
||||
int err;
|
||||
|
||||
if (write && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
op = OP_OR;
|
||||
return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
|
||||
do_proc_dointvec_bset_conv,&op);
|
||||
t = *table;
|
||||
t.data = &tmptaint;
|
||||
err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (write) {
|
||||
/*
|
||||
* Poor man's atomic or. Not worth adding a primitive
|
||||
* to everyone's atomic.h for this
|
||||
*/
|
||||
int i;
|
||||
for (i = 0; i < BITS_PER_LONG && tmptaint >> i; i++) {
|
||||
if ((tmptaint >> i) & 1)
|
||||
add_taint(i);
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
struct do_proc_dointvec_minmax_conv_param {
|
||||
@@ -2727,7 +2706,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
|
||||
*/
|
||||
|
||||
/* The generic sysctl data routine (used if no strategy routine supplied) */
|
||||
int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_data(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -2761,7 +2740,7 @@ int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
|
||||
}
|
||||
|
||||
/* The generic string strategy routine: */
|
||||
int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_string(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -2807,7 +2786,7 @@ int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
|
||||
* are between the minimum and maximum values given in the arrays
|
||||
* table->extra1 and table->extra2, respectively.
|
||||
*/
|
||||
int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_intvec(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -2843,7 +2822,7 @@ int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
|
||||
}
|
||||
|
||||
/* Strategy function to convert jiffies to seconds */
|
||||
int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_jiffies(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -2877,7 +2856,7 @@ int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
|
||||
}
|
||||
|
||||
/* Strategy function to convert jiffies to seconds */
|
||||
int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_ms_jiffies(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -2932,35 +2911,35 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
|
||||
return error;
|
||||
}
|
||||
|
||||
int sysctl_data(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_data(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int sysctl_string(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_string(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int sysctl_intvec(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_intvec(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int sysctl_jiffies(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_jiffies(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
int sysctl_ms_jiffies(struct ctl_table *table, int __user *name, int nlen,
|
||||
int sysctl_ms_jiffies(struct ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
|
@@ -3,7 +3,6 @@
|
||||
#
|
||||
config TICK_ONESHOT
|
||||
bool
|
||||
default n
|
||||
|
||||
config NO_HZ
|
||||
bool "Tickless System (Dynamic Ticks)"
|
||||
|
@@ -71,6 +71,16 @@ void clockevents_set_mode(struct clock_event_device *dev,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* clockevents_shutdown - shutdown the device and clear next_event
|
||||
* @dev: device to shutdown
|
||||
*/
|
||||
void clockevents_shutdown(struct clock_event_device *dev)
|
||||
{
|
||||
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
dev->next_event.tv64 = KTIME_MAX;
|
||||
}
|
||||
|
||||
/**
|
||||
* clockevents_program_event - Reprogram the clock event device.
|
||||
* @expires: absolute expiry time (monotonic clock)
|
||||
@@ -177,7 +187,7 @@ void clockevents_register_device(struct clock_event_device *dev)
|
||||
/*
|
||||
* Noop handler when we shut down an event device
|
||||
*/
|
||||
static void clockevents_handle_noop(struct clock_event_device *dev)
|
||||
void clockevents_handle_noop(struct clock_event_device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -199,7 +209,6 @@ void clockevents_exchange_device(struct clock_event_device *old,
|
||||
* released list and do a notify add later.
|
||||
*/
|
||||
if (old) {
|
||||
old->event_handler = clockevents_handle_noop;
|
||||
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
|
||||
list_del(&old->list);
|
||||
list_add(&old->list, &clockevents_released);
|
||||
@@ -207,7 +216,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
|
||||
|
||||
if (new) {
|
||||
BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
|
||||
clockevents_set_mode(new, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
clockevents_shutdown(new);
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@@ -244,7 +244,7 @@ static void sync_cmos_clock(unsigned long dummy)
|
||||
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
|
||||
fail = update_persistent_clock(now);
|
||||
|
||||
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec;
|
||||
next.tv_nsec = (NSEC_PER_SEC / 2) - now.tv_nsec - (TICK_NSEC / 2);
|
||||
if (next.tv_nsec <= 0)
|
||||
next.tv_nsec += NSEC_PER_SEC;
|
||||
|
||||
|
@@ -175,6 +175,8 @@ static void tick_do_periodic_broadcast(void)
|
||||
*/
|
||||
static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
|
||||
{
|
||||
ktime_t next;
|
||||
|
||||
tick_do_periodic_broadcast();
|
||||
|
||||
/*
|
||||
@@ -185,10 +187,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
|
||||
|
||||
/*
|
||||
* Setup the next period for devices, which do not have
|
||||
* periodic mode:
|
||||
* periodic mode. We read dev->next_event first and add to it
|
||||
* when the event alrady expired. clockevents_program_event()
|
||||
* sets dev->next_event only when the event is really
|
||||
* programmed to the device.
|
||||
*/
|
||||
for (;;) {
|
||||
ktime_t next = ktime_add(dev->next_event, tick_period);
|
||||
for (next = dev->next_event; ;) {
|
||||
next = ktime_add(next, tick_period);
|
||||
|
||||
if (!clockevents_program_event(dev, next, ktime_get()))
|
||||
return;
|
||||
@@ -205,7 +210,7 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
struct clock_event_device *bc, *dev;
|
||||
struct tick_device *td;
|
||||
unsigned long flags, *reason = why;
|
||||
int cpu;
|
||||
int cpu, bc_stopped;
|
||||
|
||||
spin_lock_irqsave(&tick_broadcast_lock, flags);
|
||||
|
||||
@@ -223,14 +228,16 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
if (!tick_device_is_functional(dev))
|
||||
goto out;
|
||||
|
||||
bc_stopped = cpus_empty(tick_broadcast_mask);
|
||||
|
||||
switch (*reason) {
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
|
||||
if (!cpu_isset(cpu, tick_broadcast_mask)) {
|
||||
cpu_set(cpu, tick_broadcast_mask);
|
||||
if (td->mode == TICKDEV_MODE_PERIODIC)
|
||||
clockevents_set_mode(dev,
|
||||
CLOCK_EVT_MODE_SHUTDOWN);
|
||||
if (tick_broadcast_device.mode ==
|
||||
TICKDEV_MODE_PERIODIC)
|
||||
clockevents_shutdown(dev);
|
||||
}
|
||||
if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
|
||||
tick_broadcast_force = 1;
|
||||
@@ -239,15 +246,17 @@ static void tick_do_broadcast_on_off(void *why)
|
||||
if (!tick_broadcast_force &&
|
||||
cpu_isset(cpu, tick_broadcast_mask)) {
|
||||
cpu_clear(cpu, tick_broadcast_mask);
|
||||
if (td->mode == TICKDEV_MODE_PERIODIC)
|
||||
if (tick_broadcast_device.mode ==
|
||||
TICKDEV_MODE_PERIODIC)
|
||||
tick_setup_periodic(dev, 0);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (cpus_empty(tick_broadcast_mask))
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
else {
|
||||
if (cpus_empty(tick_broadcast_mask)) {
|
||||
if (!bc_stopped)
|
||||
clockevents_shutdown(bc);
|
||||
} else if (bc_stopped) {
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
|
||||
tick_broadcast_start_periodic(bc);
|
||||
else
|
||||
@@ -298,7 +307,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
|
||||
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
|
||||
if (bc && cpus_empty(tick_broadcast_mask))
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
clockevents_shutdown(bc);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
|
||||
@@ -313,7 +322,7 @@ void tick_suspend_broadcast(void)
|
||||
|
||||
bc = tick_broadcast_device.evtdev;
|
||||
if (bc)
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
clockevents_shutdown(bc);
|
||||
|
||||
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
|
||||
}
|
||||
@@ -364,16 +373,8 @@ cpumask_t *tick_get_broadcast_oneshot_mask(void)
|
||||
static int tick_broadcast_set_event(ktime_t expires, int force)
|
||||
{
|
||||
struct clock_event_device *bc = tick_broadcast_device.evtdev;
|
||||
ktime_t now = ktime_get();
|
||||
int res;
|
||||
|
||||
for(;;) {
|
||||
res = clockevents_program_event(bc, expires, now);
|
||||
if (!res || !force)
|
||||
return res;
|
||||
now = ktime_get();
|
||||
expires = ktime_add(now, ktime_set(0, bc->min_delta_ns));
|
||||
}
|
||||
return tick_dev_program_event(bc, expires, force);
|
||||
}
|
||||
|
||||
int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
|
||||
@@ -491,14 +492,52 @@ static void tick_broadcast_clear_oneshot(int cpu)
|
||||
cpu_clear(cpu, tick_broadcast_oneshot_mask);
|
||||
}
|
||||
|
||||
static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
|
||||
{
|
||||
struct tick_device *td;
|
||||
int cpu;
|
||||
|
||||
for_each_cpu_mask_nr(cpu, *mask) {
|
||||
td = &per_cpu(tick_cpu_device, cpu);
|
||||
if (td->evtdev)
|
||||
td->evtdev->next_event = expires;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_broadcast_setup_oneshot - setup the broadcast device
|
||||
*/
|
||||
void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
|
||||
{
|
||||
bc->event_handler = tick_handle_oneshot_broadcast;
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
|
||||
bc->next_event.tv64 = KTIME_MAX;
|
||||
/* Set it up only once ! */
|
||||
if (bc->event_handler != tick_handle_oneshot_broadcast) {
|
||||
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
|
||||
int cpu = smp_processor_id();
|
||||
cpumask_t mask;
|
||||
|
||||
bc->event_handler = tick_handle_oneshot_broadcast;
|
||||
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
|
||||
|
||||
/* Take the do_timer update */
|
||||
tick_do_timer_cpu = cpu;
|
||||
|
||||
/*
|
||||
* We must be careful here. There might be other CPUs
|
||||
* waiting for periodic broadcast. We need to set the
|
||||
* oneshot_mask bits for those and program the
|
||||
* broadcast device to fire.
|
||||
*/
|
||||
mask = tick_broadcast_mask;
|
||||
cpu_clear(cpu, mask);
|
||||
cpus_or(tick_broadcast_oneshot_mask,
|
||||
tick_broadcast_oneshot_mask, mask);
|
||||
|
||||
if (was_periodic && !cpus_empty(mask)) {
|
||||
tick_broadcast_init_next_event(&mask, tick_next_period);
|
||||
tick_broadcast_set_event(tick_next_period, 1);
|
||||
} else
|
||||
bc->next_event.tv64 = KTIME_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -538,4 +577,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
|
||||
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check, whether the broadcast device is in one shot mode
|
||||
*/
|
||||
int tick_broadcast_oneshot_active(void)
|
||||
{
|
||||
return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
|
||||
*/
|
||||
ktime_t tick_next_period;
|
||||
ktime_t tick_period;
|
||||
int tick_do_timer_cpu __read_mostly = -1;
|
||||
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
|
||||
DEFINE_SPINLOCK(tick_device_lock);
|
||||
|
||||
/*
|
||||
@@ -109,7 +109,8 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
|
||||
if (!tick_device_is_functional(dev))
|
||||
return;
|
||||
|
||||
if (dev->features & CLOCK_EVT_FEAT_PERIODIC) {
|
||||
if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
|
||||
!tick_broadcast_oneshot_active()) {
|
||||
clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
|
||||
} else {
|
||||
unsigned long seq;
|
||||
@@ -148,7 +149,7 @@ static void tick_setup_device(struct tick_device *td,
|
||||
* If no cpu took the do_timer update, assign it to
|
||||
* this cpu:
|
||||
*/
|
||||
if (tick_do_timer_cpu == -1) {
|
||||
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
|
||||
tick_do_timer_cpu = cpu;
|
||||
tick_next_period = ktime_get();
|
||||
tick_period = ktime_set(0, NSEC_PER_SEC / HZ);
|
||||
@@ -161,6 +162,7 @@ static void tick_setup_device(struct tick_device *td,
|
||||
} else {
|
||||
handler = td->evtdev->event_handler;
|
||||
next_event = td->evtdev->next_event;
|
||||
td->evtdev->event_handler = clockevents_handle_noop;
|
||||
}
|
||||
|
||||
td->evtdev = newdev;
|
||||
@@ -248,7 +250,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
|
||||
* not give it back to the clockevents layer !
|
||||
*/
|
||||
if (tick_is_broadcast_device(curdev)) {
|
||||
clockevents_set_mode(curdev, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
clockevents_shutdown(curdev);
|
||||
curdev = NULL;
|
||||
}
|
||||
clockevents_exchange_device(curdev, newdev);
|
||||
@@ -299,7 +301,8 @@ static void tick_shutdown(unsigned int *cpup)
|
||||
if (*cpup == tick_do_timer_cpu) {
|
||||
int cpu = first_cpu(cpu_online_map);
|
||||
|
||||
tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu : -1;
|
||||
tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
|
||||
TICK_DO_TIMER_NONE;
|
||||
}
|
||||
spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
}
|
||||
@@ -310,7 +313,7 @@ static void tick_suspend(void)
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&tick_device_lock, flags);
|
||||
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
|
||||
clockevents_shutdown(td->evtdev);
|
||||
spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,10 @@
|
||||
/*
|
||||
* tick internal variable and functions used by low/high res code
|
||||
*/
|
||||
|
||||
#define TICK_DO_TIMER_NONE -1
|
||||
#define TICK_DO_TIMER_BOOT -2
|
||||
|
||||
DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
|
||||
extern spinlock_t tick_device_lock;
|
||||
extern ktime_t tick_next_period;
|
||||
@@ -10,6 +14,8 @@ extern int tick_do_timer_cpu __read_mostly;
|
||||
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
|
||||
extern void tick_handle_periodic(struct clock_event_device *dev);
|
||||
|
||||
extern void clockevents_shutdown(struct clock_event_device *dev);
|
||||
|
||||
/*
|
||||
* NO_HZ / high resolution timer shared code
|
||||
*/
|
||||
@@ -17,6 +23,8 @@ extern void tick_handle_periodic(struct clock_event_device *dev);
|
||||
extern void tick_setup_oneshot(struct clock_event_device *newdev,
|
||||
void (*handler)(struct clock_event_device *),
|
||||
ktime_t nextevt);
|
||||
extern int tick_dev_program_event(struct clock_event_device *dev,
|
||||
ktime_t expires, int force);
|
||||
extern int tick_program_event(ktime_t expires, int force);
|
||||
extern void tick_oneshot_notify(void);
|
||||
extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
|
||||
@@ -27,6 +35,7 @@ extern void tick_broadcast_oneshot_control(unsigned long reason);
|
||||
extern void tick_broadcast_switch_to_oneshot(void);
|
||||
extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
|
||||
extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
|
||||
extern int tick_broadcast_oneshot_active(void);
|
||||
# else /* BROADCAST */
|
||||
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
|
||||
{
|
||||
@@ -35,6 +44,7 @@ static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
|
||||
static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
|
||||
static inline void tick_broadcast_switch_to_oneshot(void) { }
|
||||
static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
|
||||
static inline int tick_broadcast_oneshot_active(void) { return 0; }
|
||||
# endif /* !BROADCAST */
|
||||
|
||||
#else /* !ONESHOT */
|
||||
@@ -64,6 +74,7 @@ static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline int tick_broadcast_oneshot_active(void) { return 0; }
|
||||
#endif /* !TICK_ONESHOT */
|
||||
|
||||
/*
|
||||
|
@@ -22,22 +22,54 @@
|
||||
|
||||
#include "tick-internal.h"
|
||||
|
||||
/**
|
||||
* tick_program_event internal worker function
|
||||
*/
|
||||
int tick_dev_program_event(struct clock_event_device *dev, ktime_t expires,
|
||||
int force)
|
||||
{
|
||||
ktime_t now = ktime_get();
|
||||
int i;
|
||||
|
||||
for (i = 0;;) {
|
||||
int ret = clockevents_program_event(dev, expires, now);
|
||||
|
||||
if (!ret || !force)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* We tried 2 times to program the device with the given
|
||||
* min_delta_ns. If that's not working then we double it
|
||||
* and emit a warning.
|
||||
*/
|
||||
if (++i > 2) {
|
||||
/* Increase the min. delta and try again */
|
||||
if (!dev->min_delta_ns)
|
||||
dev->min_delta_ns = 5000;
|
||||
else
|
||||
dev->min_delta_ns += dev->min_delta_ns >> 1;
|
||||
|
||||
printk(KERN_WARNING
|
||||
"CE: %s increasing min_delta_ns to %lu nsec\n",
|
||||
dev->name ? dev->name : "?",
|
||||
dev->min_delta_ns << 1);
|
||||
|
||||
i = 0;
|
||||
}
|
||||
|
||||
now = ktime_get();
|
||||
expires = ktime_add_ns(now, dev->min_delta_ns);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tick_program_event
|
||||
*/
|
||||
int tick_program_event(ktime_t expires, int force)
|
||||
{
|
||||
struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
|
||||
ktime_t now = ktime_get();
|
||||
|
||||
while (1) {
|
||||
int ret = clockevents_program_event(dev, expires, now);
|
||||
|
||||
if (!ret || !force)
|
||||
return ret;
|
||||
now = ktime_get();
|
||||
expires = ktime_add(now, ktime_set(0, dev->min_delta_ns));
|
||||
}
|
||||
return tick_dev_program_event(dev, expires, force);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -61,7 +93,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
|
||||
{
|
||||
newdev->event_handler = handler;
|
||||
clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
|
||||
clockevents_program_event(newdev, next_event, ktime_get());
|
||||
tick_dev_program_event(newdev, next_event, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <linux/profile.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
@@ -75,6 +76,9 @@ static void tick_do_update_jiffies64(ktime_t now)
|
||||
incr * ticks);
|
||||
}
|
||||
do_timer(++ticks);
|
||||
|
||||
/* Keep the tick_next_period variable up to date */
|
||||
tick_next_period = ktime_add(last_jiffies_update, tick_period);
|
||||
}
|
||||
write_sequnlock(&xtime_lock);
|
||||
}
|
||||
@@ -187,9 +191,17 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
|
||||
{
|
||||
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
|
||||
|
||||
*last_update_time = ktime_to_us(ts->idle_lastupdate);
|
||||
if (!tick_nohz_enabled)
|
||||
return -1;
|
||||
|
||||
if (ts->idle_active)
|
||||
*last_update_time = ktime_to_us(ts->idle_lastupdate);
|
||||
else
|
||||
*last_update_time = ktime_to_us(ktime_get());
|
||||
|
||||
return ktime_to_us(ts->idle_sleeptime);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
|
||||
|
||||
/**
|
||||
* tick_nohz_stop_sched_tick - stop the idle tick from the idle task
|
||||
@@ -221,7 +233,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
||||
*/
|
||||
if (unlikely(!cpu_online(cpu))) {
|
||||
if (cpu == tick_do_timer_cpu)
|
||||
tick_do_timer_cpu = -1;
|
||||
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
||||
}
|
||||
|
||||
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
|
||||
@@ -258,7 +270,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
||||
next_jiffies = get_next_timer_interrupt(last_jiffies);
|
||||
delta_jiffies = next_jiffies - last_jiffies;
|
||||
|
||||
if (rcu_needs_cpu(cpu))
|
||||
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
|
||||
delta_jiffies = 1;
|
||||
/*
|
||||
* Do not stop the tick, if we are only one off
|
||||
@@ -303,7 +315,7 @@ void tick_nohz_stop_sched_tick(int inidle)
|
||||
* invoked.
|
||||
*/
|
||||
if (cpu == tick_do_timer_cpu)
|
||||
tick_do_timer_cpu = -1;
|
||||
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
|
||||
|
||||
ts->idle_sleeps++;
|
||||
|
||||
@@ -468,7 +480,7 @@ static void tick_nohz_handler(struct clock_event_device *dev)
|
||||
* this duty, then the jiffies update is still serialized by
|
||||
* xtime_lock.
|
||||
*/
|
||||
if (unlikely(tick_do_timer_cpu == -1))
|
||||
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
|
||||
tick_do_timer_cpu = cpu;
|
||||
|
||||
/* Check, if the jiffies need an update */
|
||||
@@ -570,7 +582,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
|
||||
* this duty, then the jiffies update is still serialized by
|
||||
* xtime_lock.
|
||||
*/
|
||||
if (unlikely(tick_do_timer_cpu == -1))
|
||||
if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
|
||||
tick_do_timer_cpu = cpu;
|
||||
#endif
|
||||
|
||||
@@ -622,7 +634,7 @@ void tick_setup_sched_timer(void)
|
||||
*/
|
||||
hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
|
||||
ts->sched_timer.function = tick_sched_timer;
|
||||
ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
ts->sched_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
|
||||
|
||||
/* Get the next period (per cpu) */
|
||||
hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
|
||||
|
@@ -978,6 +978,7 @@ void update_process_times(int user_tick)
|
||||
run_local_timers();
|
||||
if (rcu_pending(cpu))
|
||||
rcu_check_callbacks(cpu, user_tick);
|
||||
printk_tick();
|
||||
scheduler_tick();
|
||||
run_posix_cpu_timers(p);
|
||||
}
|
||||
|
@@ -202,7 +202,7 @@ static void start_stack_timer(int cpu)
|
||||
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hrtimer->function = stack_trace_timer_fn;
|
||||
hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
|
||||
hrtimer->cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
|
||||
|
||||
hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
|
||||
}
|
||||
|
@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
|
||||
return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
unsigned long rt_runtime;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%lu", &rt_runtime);
|
||||
sscanf(buf, "%ld", &rt_runtime);
|
||||
|
||||
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
|
||||
|
||||
|
@@ -60,7 +60,7 @@ static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
|
||||
|
||||
#ifdef CONFIG_SYSCTL_SYSCALL
|
||||
/* The generic string strategy routine: */
|
||||
static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
|
||||
static int sysctl_uts_string(ctl_table *table,
|
||||
void __user *oldval, size_t __user *oldlenp,
|
||||
void __user *newval, size_t newlen)
|
||||
{
|
||||
@@ -69,8 +69,7 @@ static int sysctl_uts_string(ctl_table *table, int __user *name, int nlen,
|
||||
write = newval && newlen;
|
||||
memcpy(&uts_table, table, sizeof(uts_table));
|
||||
uts_table.data = get_uts(table, write);
|
||||
r = sysctl_string(&uts_table, name, nlen,
|
||||
oldval, oldlenp, newval, newlen);
|
||||
r = sysctl_string(&uts_table, oldval, oldlenp, newval, newlen);
|
||||
put_uts(table, write, uts_table.data);
|
||||
return r;
|
||||
}
|
||||
|
@@ -72,12 +72,7 @@ prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
if (list_empty(&wait->task_list))
|
||||
__add_wait_queue(q, wait);
|
||||
/*
|
||||
* don't alter the task state if this is just going to
|
||||
* queue an async wait queue callback
|
||||
*/
|
||||
if (is_sync_wait(wait))
|
||||
set_current_state(state);
|
||||
set_current_state(state);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait);
|
||||
@@ -91,12 +86,7 @@ prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state)
|
||||
spin_lock_irqsave(&q->lock, flags);
|
||||
if (list_empty(&wait->task_list))
|
||||
__add_wait_queue_tail(q, wait);
|
||||
/*
|
||||
* don't alter the task state if this is just going to
|
||||
* queue an async wait queue callback
|
||||
*/
|
||||
if (is_sync_wait(wait))
|
||||
set_current_state(state);
|
||||
set_current_state(state);
|
||||
spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_wait_exclusive);
|
||||
|
@@ -9,7 +9,7 @@
|
||||
* Derived from the taskqueue/keventd code by:
|
||||
*
|
||||
* David Woodhouse <dwmw2@infradead.org>
|
||||
* Andrew Morton <andrewm@uow.edu.au>
|
||||
* Andrew Morton
|
||||
* Kai Petzke <wpp@marie.physik.tu-berlin.de>
|
||||
* Theodore Ts'o <tytso@mit.edu>
|
||||
*
|
||||
|
Reference in New Issue
Block a user