Merge branch 'core/percpu' into stackprotector

Conflicts:
	arch/x86/include/asm/pda.h
	arch/x86/include/asm/system.h

Also, moved include/asm-x86/stackprotector.h to arch/x86/include/asm.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2009-01-18 18:37:14 +01:00
4925 changed files with 728361 additions and 108325 deletions

View File

@@ -9,7 +9,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
rcupdate.o extable.o params.o posix-timers.o \
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
async.o
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
@@ -39,7 +40,11 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y)
obj-y += smp.o
else
obj-y += up.o
endif
obj-$(CONFIG_SMP) += spinlock.o
obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o

346
kernel/async.c Normal file
View File

@@ -0,0 +1,346 @@
/*
* async.c: Asynchronous function calls for boot performance
*
* (C) Copyright 2009 Intel Corporation
* Author: Arjan van de Ven <arjan@linux.intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
/*
Goals and Theory of Operation
The primary goal of this feature is to reduce the kernel boot time,
by doing various independent hardware delays and discovery operations
decoupled and not strictly serialized.
More specifically, the asynchronous function call concept allows
certain operations (primarily during system boot) to happen
asynchronously, out of order, while these operations still
have their externally visible parts happen sequentially and in-order.
(not unlike how out-of-order CPUs retire their instructions in order)
Key to the asynchronous function call implementation is the concept of
a "sequence cookie" (which, although it has an abstracted type, can be
thought of as a monotonically incrementing number).
The async core will assign each scheduled event such a sequence cookie and
pass this to the called functions.
The asynchronously called function should before doing a globally visible
operation, such as registering device numbers, call the
async_synchronize_cookie() function and pass in its own cookie. The
async_synchronize_cookie() function will make sure that all asynchronous
operations that were scheduled prior to the operation corresponding with the
cookie have completed.
Subsystem/driver initialization code that scheduled asynchronous probe
functions, but which shares global resources with other drivers/subsystems
that do not use the asynchronous call feature, need to do a full
synchronization with the async_synchronize_full() function, before returning
from their init function. This is to maintain strict ordering between the
asynchronous and synchronous parts of the kernel.
*/
#include <linux/async.h>
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/kthread.h>
#include <asm/atomic.h>
static async_cookie_t next_cookie = 1;
#define MAX_THREADS 256
#define MAX_WORK 32768
static LIST_HEAD(async_pending);
static LIST_HEAD(async_running);
static DEFINE_SPINLOCK(async_lock);
static int async_enabled = 0;
struct async_entry {
struct list_head list;
async_cookie_t cookie;
async_func_ptr *func;
void *data;
struct list_head *running;
};
static DECLARE_WAIT_QUEUE_HEAD(async_done);
static DECLARE_WAIT_QUEUE_HEAD(async_new);
static atomic_t entry_count;
static atomic_t thread_count;
extern int initcall_debug;
/*
* MUST be called with the lock held!
*/
static async_cookie_t __lowest_in_progress(struct list_head *running)
{
struct async_entry *entry;
if (!list_empty(running)) {
entry = list_first_entry(running,
struct async_entry, list);
return entry->cookie;
} else if (!list_empty(&async_pending)) {
entry = list_first_entry(&async_pending,
struct async_entry, list);
return entry->cookie;
} else {
/* nothing in progress... next_cookie is "infinity" */
return next_cookie;
}
}
static async_cookie_t lowest_in_progress(struct list_head *running)
{
unsigned long flags;
async_cookie_t ret;
spin_lock_irqsave(&async_lock, flags);
ret = __lowest_in_progress(running);
spin_unlock_irqrestore(&async_lock, flags);
return ret;
}
/*
* pick the first pending entry and run it
*/
static void run_one_entry(void)
{
unsigned long flags;
struct async_entry *entry;
ktime_t calltime, delta, rettime;
/* 1) pick one task from the pending queue */
spin_lock_irqsave(&async_lock, flags);
if (list_empty(&async_pending))
goto out;
entry = list_first_entry(&async_pending, struct async_entry, list);
/* 2) move it to the running queue */
list_del(&entry->list);
list_add_tail(&entry->list, &async_running);
spin_unlock_irqrestore(&async_lock, flags);
/* 3) run it (and print duration)*/
if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current));
calltime = ktime_get();
}
entry->func(entry->data, entry->cookie);
if (initcall_debug && system_state == SYSTEM_BOOTING) {
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie,
entry->func, ktime_to_ns(delta) >> 10);
}
/* 4) remove it from the running queue */
spin_lock_irqsave(&async_lock, flags);
list_del(&entry->list);
/* 5) free the entry */
kfree(entry);
atomic_dec(&entry_count);
spin_unlock_irqrestore(&async_lock, flags);
/* 6) wake up any waiters. */
wake_up(&async_done);
return;
out:
spin_unlock_irqrestore(&async_lock, flags);
}
static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct list_head *running)
{
struct async_entry *entry;
unsigned long flags;
async_cookie_t newcookie;
/* allow irq-off callers */
entry = kzalloc(sizeof(struct async_entry), GFP_ATOMIC);
/*
* If we're out of memory or if there's too much work
* pending already, we execute synchronously.
*/
if (!async_enabled || !entry || atomic_read(&entry_count) > MAX_WORK) {
kfree(entry);
spin_lock_irqsave(&async_lock, flags);
newcookie = next_cookie++;
spin_unlock_irqrestore(&async_lock, flags);
/* low on memory.. run synchronously */
ptr(data, newcookie);
return newcookie;
}
entry->func = ptr;
entry->data = data;
entry->running = running;
spin_lock_irqsave(&async_lock, flags);
newcookie = entry->cookie = next_cookie++;
list_add_tail(&entry->list, &async_pending);
atomic_inc(&entry_count);
spin_unlock_irqrestore(&async_lock, flags);
wake_up(&async_new);
return newcookie;
}
async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
{
return __async_schedule(ptr, data, &async_pending);
}
EXPORT_SYMBOL_GPL(async_schedule);
async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running)
{
return __async_schedule(ptr, data, running);
}
EXPORT_SYMBOL_GPL(async_schedule_special);
void async_synchronize_full(void)
{
do {
async_synchronize_cookie(next_cookie);
} while (!list_empty(&async_running) || !list_empty(&async_pending));
}
EXPORT_SYMBOL_GPL(async_synchronize_full);
void async_synchronize_full_special(struct list_head *list)
{
async_synchronize_cookie_special(next_cookie, list);
}
EXPORT_SYMBOL_GPL(async_synchronize_full_special);
void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running)
{
ktime_t starttime, delta, endtime;
if (initcall_debug && system_state == SYSTEM_BOOTING) {
printk("async_waiting @ %i\n", task_pid_nr(current));
starttime = ktime_get();
}
wait_event(async_done, lowest_in_progress(running) >= cookie);
if (initcall_debug && system_state == SYSTEM_BOOTING) {
endtime = ktime_get();
delta = ktime_sub(endtime, starttime);
printk("async_continuing @ %i after %lli usec\n",
task_pid_nr(current), ktime_to_ns(delta) >> 10);
}
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie_special);
void async_synchronize_cookie(async_cookie_t cookie)
{
async_synchronize_cookie_special(cookie, &async_running);
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie);
static int async_thread(void *unused)
{
DECLARE_WAITQUEUE(wq, current);
add_wait_queue(&async_new, &wq);
while (!kthread_should_stop()) {
int ret = HZ;
set_current_state(TASK_INTERRUPTIBLE);
/*
* check the list head without lock.. false positives
* are dealt with inside run_one_entry() while holding
* the lock.
*/
rmb();
if (!list_empty(&async_pending))
run_one_entry();
else
ret = schedule_timeout(HZ);
if (ret == 0) {
/*
* we timed out, this means we as thread are redundant.
* we sign off and die, but we to avoid any races there
* is a last-straw check to see if work snuck in.
*/
atomic_dec(&thread_count);
wmb(); /* manager must see our departure first */
if (list_empty(&async_pending))
break;
/*
* woops work came in between us timing out and us
* signing off; we need to stay alive and keep working.
*/
atomic_inc(&thread_count);
}
}
remove_wait_queue(&async_new, &wq);
return 0;
}
static int async_manager_thread(void *unused)
{
DECLARE_WAITQUEUE(wq, current);
add_wait_queue(&async_new, &wq);
while (!kthread_should_stop()) {
int tc, ec;
set_current_state(TASK_INTERRUPTIBLE);
tc = atomic_read(&thread_count);
rmb();
ec = atomic_read(&entry_count);
while (tc < ec && tc < MAX_THREADS) {
kthread_run(async_thread, NULL, "async/%i", tc);
atomic_inc(&thread_count);
tc++;
}
schedule();
}
remove_wait_queue(&async_new, &wq);
return 0;
}
static int __init async_init(void)
{
if (async_enabled)
kthread_run(async_manager_thread, NULL, "async/mgr");
return 0;
}
static int __init setup_async(char *str)
{
async_enabled = 1;
return 1;
}
__setup("fastboot", setup_async);
core_initcall(async_init);

View File

@@ -159,11 +159,8 @@ static inline int audit_signal_info(int sig, struct task_struct *t)
return __audit_signal_info(sig, t);
return 0;
}
extern enum audit_state audit_filter_inodes(struct task_struct *,
struct audit_context *);
extern void audit_set_auditable(struct audit_context *);
extern void audit_filter_inodes(struct task_struct *, struct audit_context *);
#else
#define audit_signal_info(s,t) AUDIT_DISABLED
#define audit_filter_inodes(t,c) AUDIT_DISABLED
#define audit_set_auditable(c)
#endif

View File

@@ -450,6 +450,7 @@ static void kill_rules(struct audit_tree *tree)
audit_log_end(ab);
rule->tree = NULL;
list_del_rcu(&entry->list);
list_del(&entry->rule.list);
call_rcu(&entry->rcu, audit_free_rule_rcu);
}
}
@@ -617,7 +618,7 @@ int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
if (pathname[0] != '/' ||
rule->listnr != AUDIT_FILTER_EXIT ||
op & ~AUDIT_EQUAL ||
op != Audit_equal ||
rule->inode_f || rule->watch || rule->tree)
return -EINVAL;
rule->tree = alloc_tree(pathname);

View File

@@ -86,6 +86,14 @@ struct list_head audit_filter_list[AUDIT_NR_FILTERS] = {
#error Fix audit_filter_list initialiser
#endif
};
static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
LIST_HEAD_INIT(audit_rules_list[0]),
LIST_HEAD_INIT(audit_rules_list[1]),
LIST_HEAD_INIT(audit_rules_list[2]),
LIST_HEAD_INIT(audit_rules_list[3]),
LIST_HEAD_INIT(audit_rules_list[4]),
LIST_HEAD_INIT(audit_rules_list[5]),
};
DEFINE_MUTEX(audit_filter_mutex);
@@ -244,7 +252,8 @@ static inline int audit_to_inode(struct audit_krule *krule,
struct audit_field *f)
{
if (krule->listnr != AUDIT_FILTER_EXIT ||
krule->watch || krule->inode_f || krule->tree)
krule->watch || krule->inode_f || krule->tree ||
(f->op != Audit_equal && f->op != Audit_not_equal))
return -EINVAL;
krule->inode_f = f;
@@ -262,7 +271,7 @@ static int audit_to_watch(struct audit_krule *krule, char *path, int len,
if (path[0] != '/' || path[len-1] == '/' ||
krule->listnr != AUDIT_FILTER_EXIT ||
op & ~AUDIT_EQUAL ||
op != Audit_equal ||
krule->inode_f || krule->watch || krule->tree)
return -EINVAL;
@@ -412,12 +421,32 @@ exit_err:
return ERR_PTR(err);
}
static u32 audit_ops[] =
{
[Audit_equal] = AUDIT_EQUAL,
[Audit_not_equal] = AUDIT_NOT_EQUAL,
[Audit_bitmask] = AUDIT_BIT_MASK,
[Audit_bittest] = AUDIT_BIT_TEST,
[Audit_lt] = AUDIT_LESS_THAN,
[Audit_gt] = AUDIT_GREATER_THAN,
[Audit_le] = AUDIT_LESS_THAN_OR_EQUAL,
[Audit_ge] = AUDIT_GREATER_THAN_OR_EQUAL,
};
static u32 audit_to_op(u32 op)
{
u32 n;
for (n = Audit_equal; n < Audit_bad && audit_ops[n] != op; n++)
;
return n;
}
/* Translate struct audit_rule to kernel's rule respresentation.
* Exists for backward compatibility with userspace. */
static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
{
struct audit_entry *entry;
struct audit_field *ino_f;
int err = 0;
int i;
@@ -427,12 +456,28 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
for (i = 0; i < rule->field_count; i++) {
struct audit_field *f = &entry->rule.fields[i];
u32 n;
n = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
/* Support for legacy operators where
* AUDIT_NEGATE bit signifies != and otherwise assumes == */
if (n & AUDIT_NEGATE)
f->op = Audit_not_equal;
else if (!n)
f->op = Audit_equal;
else
f->op = audit_to_op(n);
entry->rule.vers_ops = (n & AUDIT_OPERATORS) ? 2 : 1;
f->op = rule->fields[i] & (AUDIT_NEGATE|AUDIT_OPERATORS);
f->type = rule->fields[i] & ~(AUDIT_NEGATE|AUDIT_OPERATORS);
f->val = rule->values[i];
err = -EINVAL;
if (f->op == Audit_bad)
goto exit_free;
switch(f->type) {
default:
goto exit_free;
@@ -454,11 +499,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
case AUDIT_EXIT:
case AUDIT_SUCCESS:
/* bit ops are only useful on syscall args */
if (f->op == AUDIT_BIT_MASK ||
f->op == AUDIT_BIT_TEST) {
err = -EINVAL;
if (f->op == Audit_bitmask || f->op == Audit_bittest)
goto exit_free;
}
break;
case AUDIT_ARG0:
case AUDIT_ARG1:
@@ -467,11 +509,8 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
break;
/* arch is only allowed to be = or != */
case AUDIT_ARCH:
if ((f->op != AUDIT_NOT_EQUAL) && (f->op != AUDIT_EQUAL)
&& (f->op != AUDIT_NEGATE) && (f->op)) {
err = -EINVAL;
if (f->op != Audit_not_equal && f->op != Audit_equal)
goto exit_free;
}
entry->rule.arch_f = f;
break;
case AUDIT_PERM:
@@ -488,33 +527,10 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
goto exit_free;
break;
}
entry->rule.vers_ops = (f->op & AUDIT_OPERATORS) ? 2 : 1;
/* Support for legacy operators where
* AUDIT_NEGATE bit signifies != and otherwise assumes == */
if (f->op & AUDIT_NEGATE)
f->op = AUDIT_NOT_EQUAL;
else if (!f->op)
f->op = AUDIT_EQUAL;
else if (f->op == AUDIT_OPERATORS) {
err = -EINVAL;
goto exit_free;
}
}
ino_f = entry->rule.inode_f;
if (ino_f) {
switch(ino_f->op) {
case AUDIT_NOT_EQUAL:
entry->rule.inode_f = NULL;
case AUDIT_EQUAL:
break;
default:
err = -EINVAL;
goto exit_free;
}
}
if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
entry->rule.inode_f = NULL;
exit_nofree:
return entry;
@@ -530,7 +546,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
{
int err = 0;
struct audit_entry *entry;
struct audit_field *ino_f;
void *bufp;
size_t remain = datasz - sizeof(struct audit_rule_data);
int i;
@@ -546,11 +561,11 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
struct audit_field *f = &entry->rule.fields[i];
err = -EINVAL;
if (!(data->fieldflags[i] & AUDIT_OPERATORS) ||
data->fieldflags[i] & ~AUDIT_OPERATORS)
f->op = audit_to_op(data->fieldflags[i]);
if (f->op == Audit_bad)
goto exit_free;
f->op = data->fieldflags[i] & AUDIT_OPERATORS;
f->type = data->fields[i];
f->val = data->values[i];
f->lsm_str = NULL;
@@ -662,18 +677,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
}
}
ino_f = entry->rule.inode_f;
if (ino_f) {
switch(ino_f->op) {
case AUDIT_NOT_EQUAL:
entry->rule.inode_f = NULL;
case AUDIT_EQUAL:
break;
default:
err = -EINVAL;
goto exit_free;
}
}
if (entry->rule.inode_f && entry->rule.inode_f->op == Audit_not_equal)
entry->rule.inode_f = NULL;
exit_nofree:
return entry;
@@ -713,10 +718,10 @@ static struct audit_rule *audit_krule_to_rule(struct audit_krule *krule)
rule->fields[i] = krule->fields[i].type;
if (krule->vers_ops == 1) {
if (krule->fields[i].op & AUDIT_NOT_EQUAL)
if (krule->fields[i].op == Audit_not_equal)
rule->fields[i] |= AUDIT_NEGATE;
} else {
rule->fields[i] |= krule->fields[i].op;
rule->fields[i] |= audit_ops[krule->fields[i].op];
}
}
for (i = 0; i < AUDIT_BITMASK_SIZE; i++) rule->mask[i] = krule->mask[i];
@@ -744,7 +749,7 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
struct audit_field *f = &krule->fields[i];
data->fields[i] = f->type;
data->fieldflags[i] = f->op;
data->fieldflags[i] = audit_ops[f->op];
switch(f->type) {
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
@@ -919,6 +924,7 @@ static struct audit_entry *audit_dupe_rule(struct audit_krule *old,
new->action = old->action;
for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
new->mask[i] = old->mask[i];
new->prio = old->prio;
new->buflen = old->buflen;
new->inode_f = old->inode_f;
new->watch = NULL;
@@ -987,9 +993,8 @@ static void audit_update_watch(struct audit_parent *parent,
/* If the update involves invalidating rules, do the inode-based
* filtering now, so we don't omit records. */
if (invalidating && current->audit_context &&
audit_filter_inodes(current, current->audit_context) == AUDIT_RECORD_CONTEXT)
audit_set_auditable(current->audit_context);
if (invalidating && current->audit_context)
audit_filter_inodes(current, current->audit_context);
nwatch = audit_dupe_watch(owatch);
if (IS_ERR(nwatch)) {
@@ -1007,12 +1012,15 @@ static void audit_update_watch(struct audit_parent *parent,
list_del_rcu(&oentry->list);
nentry = audit_dupe_rule(&oentry->rule, nwatch);
if (IS_ERR(nentry))
if (IS_ERR(nentry)) {
list_del(&oentry->rule.list);
audit_panic("error updating watch, removing");
else {
} else {
int h = audit_hash_ino((u32)ino);
list_add(&nentry->rule.rlist, &nwatch->rules);
list_add_rcu(&nentry->list, &audit_inode_hash[h]);
list_replace(&oentry->rule.list,
&nentry->rule.list);
}
call_rcu(&oentry->rcu, audit_free_rule_rcu);
@@ -1077,6 +1085,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
audit_log_end(ab);
}
list_del(&r->rlist);
list_del(&r->list);
list_del_rcu(&e->list);
call_rcu(&e->rcu, audit_free_rule_rcu);
}
@@ -1102,12 +1111,16 @@ static void audit_inotify_unregister(struct list_head *in_list)
/* Find an existing audit rule.
* Caller must hold audit_filter_mutex to prevent stale rule data. */
static struct audit_entry *audit_find_rule(struct audit_entry *entry,
struct list_head *list)
struct list_head **p)
{
struct audit_entry *e, *found = NULL;
struct list_head *list;
int h;
if (entry->rule.watch) {
if (entry->rule.inode_f) {
h = audit_hash_ino(entry->rule.inode_f->val);
*p = list = &audit_inode_hash[h];
} else if (entry->rule.watch) {
/* we don't know the inode number, so must walk entire hash */
for (h = 0; h < AUDIT_INODE_BUCKETS; h++) {
list = &audit_inode_hash[h];
@@ -1118,6 +1131,8 @@ static struct audit_entry *audit_find_rule(struct audit_entry *entry,
}
}
goto out;
} else {
*p = list = &audit_filter_list[entry->rule.listnr];
}
list_for_each_entry(e, list, list)
@@ -1258,15 +1273,17 @@ static int audit_add_watch(struct audit_krule *krule, struct nameidata *ndp,
return ret;
}
static u64 prio_low = ~0ULL/2;
static u64 prio_high = ~0ULL/2 - 1;
/* Add rule to given filterlist if not a duplicate. */
static inline int audit_add_rule(struct audit_entry *entry,
struct list_head *list)
static inline int audit_add_rule(struct audit_entry *entry)
{
struct audit_entry *e;
struct audit_field *inode_f = entry->rule.inode_f;
struct audit_watch *watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct nameidata *ndp = NULL, *ndw = NULL;
struct list_head *list;
int h, err;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@@ -1277,13 +1294,8 @@ static inline int audit_add_rule(struct audit_entry *entry,
dont_count = 1;
#endif
if (inode_f) {
h = audit_hash_ino(inode_f->val);
list = &audit_inode_hash[h];
}
mutex_lock(&audit_filter_mutex);
e = audit_find_rule(entry, list);
e = audit_find_rule(entry, &list);
mutex_unlock(&audit_filter_mutex);
if (e) {
err = -EEXIST;
@@ -1319,10 +1331,22 @@ static inline int audit_add_rule(struct audit_entry *entry,
}
}
entry->rule.prio = ~0ULL;
if (entry->rule.listnr == AUDIT_FILTER_EXIT) {
if (entry->rule.flags & AUDIT_FILTER_PREPEND)
entry->rule.prio = ++prio_high;
else
entry->rule.prio = --prio_low;
}
if (entry->rule.flags & AUDIT_FILTER_PREPEND) {
list_add(&entry->rule.list,
&audit_rules_list[entry->rule.listnr]);
list_add_rcu(&entry->list, list);
entry->rule.flags &= ~AUDIT_FILTER_PREPEND;
} else {
list_add_tail(&entry->rule.list,
&audit_rules_list[entry->rule.listnr]);
list_add_tail_rcu(&entry->list, list);
}
#ifdef CONFIG_AUDITSYSCALL
@@ -1345,15 +1369,14 @@ error:
}
/* Remove an existing rule from filterlist. */
static inline int audit_del_rule(struct audit_entry *entry,
struct list_head *list)
static inline int audit_del_rule(struct audit_entry *entry)
{
struct audit_entry *e;
struct audit_field *inode_f = entry->rule.inode_f;
struct audit_watch *watch, *tmp_watch = entry->rule.watch;
struct audit_tree *tree = entry->rule.tree;
struct list_head *list;
LIST_HEAD(inotify_list);
int h, ret = 0;
int ret = 0;
#ifdef CONFIG_AUDITSYSCALL
int dont_count = 0;
@@ -1363,13 +1386,8 @@ static inline int audit_del_rule(struct audit_entry *entry,
dont_count = 1;
#endif
if (inode_f) {
h = audit_hash_ino(inode_f->val);
list = &audit_inode_hash[h];
}
mutex_lock(&audit_filter_mutex);
e = audit_find_rule(entry, list);
e = audit_find_rule(entry, &list);
if (!e) {
mutex_unlock(&audit_filter_mutex);
ret = -ENOENT;
@@ -1404,6 +1422,7 @@ static inline int audit_del_rule(struct audit_entry *entry,
audit_remove_tree_rule(&e->rule);
list_del_rcu(&e->list);
list_del(&e->rule.list);
call_rcu(&e->rcu, audit_free_rule_rcu);
#ifdef CONFIG_AUDITSYSCALL
@@ -1432,30 +1451,16 @@ out:
static void audit_list(int pid, int seq, struct sk_buff_head *q)
{
struct sk_buff *skb;
struct audit_entry *entry;
struct audit_krule *r;
int i;
/* This is a blocking read, so use audit_filter_mutex instead of rcu
* iterator to sync with list writers. */
for (i=0; i<AUDIT_NR_FILTERS; i++) {
list_for_each_entry(entry, &audit_filter_list[i], list) {
list_for_each_entry(r, &audit_rules_list[i], list) {
struct audit_rule *rule;
rule = audit_krule_to_rule(&entry->rule);
if (unlikely(!rule))
break;
skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
rule, sizeof(*rule));
if (skb)
skb_queue_tail(q, skb);
kfree(rule);
}
}
for (i = 0; i < AUDIT_INODE_BUCKETS; i++) {
list_for_each_entry(entry, &audit_inode_hash[i], list) {
struct audit_rule *rule;
rule = audit_krule_to_rule(&entry->rule);
rule = audit_krule_to_rule(r);
if (unlikely(!rule))
break;
skb = audit_make_reply(pid, seq, AUDIT_LIST, 0, 1,
@@ -1474,30 +1479,16 @@ static void audit_list(int pid, int seq, struct sk_buff_head *q)
static void audit_list_rules(int pid, int seq, struct sk_buff_head *q)
{
struct sk_buff *skb;
struct audit_entry *e;
struct audit_krule *r;
int i;
/* This is a blocking read, so use audit_filter_mutex instead of rcu
* iterator to sync with list writers. */
for (i=0; i<AUDIT_NR_FILTERS; i++) {
list_for_each_entry(e, &audit_filter_list[i], list) {
list_for_each_entry(r, &audit_rules_list[i], list) {
struct audit_rule_data *data;
data = audit_krule_to_data(&e->rule);
if (unlikely(!data))
break;
skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
data, sizeof(*data) + data->buflen);
if (skb)
skb_queue_tail(q, skb);
kfree(data);
}
}
for (i=0; i< AUDIT_INODE_BUCKETS; i++) {
list_for_each_entry(e, &audit_inode_hash[i], list) {
struct audit_rule_data *data;
data = audit_krule_to_data(&e->rule);
data = audit_krule_to_data(r);
if (unlikely(!data))
break;
skb = audit_make_reply(pid, seq, AUDIT_LIST_RULES, 0, 1,
@@ -1603,8 +1594,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
if (IS_ERR(entry))
return PTR_ERR(entry);
err = audit_add_rule(entry,
&audit_filter_list[entry->rule.listnr]);
err = audit_add_rule(entry);
audit_log_rule_change(loginuid, sessionid, sid, "add",
&entry->rule, !err);
@@ -1620,8 +1610,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
if (IS_ERR(entry))
return PTR_ERR(entry);
err = audit_del_rule(entry,
&audit_filter_list[entry->rule.listnr]);
err = audit_del_rule(entry);
audit_log_rule_change(loginuid, sessionid, sid, "remove",
&entry->rule, !err);
@@ -1634,28 +1623,29 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data,
return err;
}
int audit_comparator(const u32 left, const u32 op, const u32 right)
int audit_comparator(u32 left, u32 op, u32 right)
{
switch (op) {
case AUDIT_EQUAL:
case Audit_equal:
return (left == right);
case AUDIT_NOT_EQUAL:
case Audit_not_equal:
return (left != right);
case AUDIT_LESS_THAN:
case Audit_lt:
return (left < right);
case AUDIT_LESS_THAN_OR_EQUAL:
case Audit_le:
return (left <= right);
case AUDIT_GREATER_THAN:
case Audit_gt:
return (left > right);
case AUDIT_GREATER_THAN_OR_EQUAL:
case Audit_ge:
return (left >= right);
case AUDIT_BIT_MASK:
case Audit_bitmask:
return (left & right);
case AUDIT_BIT_TEST:
case Audit_bittest:
return ((left & right) == right);
default:
BUG();
return 0;
}
BUG();
return 0;
}
/* Compare given dentry name with last component in given path,
@@ -1778,6 +1768,43 @@ unlock_and_return:
return result;
}
static int update_lsm_rule(struct audit_krule *r)
{
struct audit_entry *entry = container_of(r, struct audit_entry, rule);
struct audit_entry *nentry;
struct audit_watch *watch;
struct audit_tree *tree;
int err = 0;
if (!security_audit_rule_known(r))
return 0;
watch = r->watch;
tree = r->tree;
nentry = audit_dupe_rule(r, watch);
if (IS_ERR(nentry)) {
/* save the first error encountered for the
* return value */
err = PTR_ERR(nentry);
audit_panic("error updating LSM filters");
if (watch)
list_del(&r->rlist);
list_del_rcu(&entry->list);
list_del(&r->list);
} else {
if (watch) {
list_add(&nentry->rule.rlist, &watch->rules);
list_del(&r->rlist);
} else if (tree)
list_replace_init(&r->rlist, &nentry->rule.rlist);
list_replace_rcu(&entry->list, &nentry->list);
list_replace(&r->list, &nentry->rule.list);
}
call_rcu(&entry->rcu, audit_free_rule_rcu);
return err;
}
/* This function will re-initialize the lsm_rule field of all applicable rules.
* It will traverse the filter lists serarching for rules that contain LSM
* specific filter fields. When such a rule is found, it is copied, the
@@ -1785,45 +1812,19 @@ unlock_and_return:
* updated rule. */
int audit_update_lsm_rules(void)
{
struct audit_entry *entry, *n, *nentry;
struct audit_watch *watch;
struct audit_tree *tree;
struct audit_krule *r, *n;
int i, err = 0;
/* audit_filter_mutex synchronizes the writers */
mutex_lock(&audit_filter_mutex);
for (i = 0; i < AUDIT_NR_FILTERS; i++) {
list_for_each_entry_safe(entry, n, &audit_filter_list[i], list) {
if (!security_audit_rule_known(&entry->rule))
continue;
watch = entry->rule.watch;
tree = entry->rule.tree;
nentry = audit_dupe_rule(&entry->rule, watch);
if (IS_ERR(nentry)) {
/* save the first error encountered for the
* return value */
if (!err)
err = PTR_ERR(nentry);
audit_panic("error updating LSM filters");
if (watch)
list_del(&entry->rule.rlist);
list_del_rcu(&entry->list);
} else {
if (watch) {
list_add(&nentry->rule.rlist,
&watch->rules);
list_del(&entry->rule.rlist);
} else if (tree)
list_replace_init(&entry->rule.rlist,
&nentry->rule.rlist);
list_replace_rcu(&entry->list, &nentry->list);
}
call_rcu(&entry->rcu, audit_free_rule_rcu);
list_for_each_entry_safe(r, n, &audit_rules_list[i], list) {
int res = update_lsm_rule(r);
if (!err)
err = res;
}
}
mutex_unlock(&audit_filter_mutex);
return err;

File diff suppressed because it is too large Load Diff

View File

@@ -280,9 +280,7 @@ asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data)
if (ret < 0)
goto error;
ret = audit_log_capset(pid, new, current_cred());
if (ret < 0)
return ret;
audit_log_capset(pid, new, current_cred());
return commit_creds(new);
@@ -308,7 +306,7 @@ int capable(int cap)
BUG();
}
if (has_capability(current, cap)) {
if (security_capable(cap) == 0) {
current->flags |= PF_SUPERPRIV;
return 1;
}

View File

@@ -84,7 +84,7 @@ struct cgroupfs_root {
/* Tracks how many cgroups are currently defined in hierarchy.*/
int number_of_cgroups;
/* A list running through the mounted hierarchies */
/* A list running through the active hierarchies */
struct list_head root_list;
/* Hierarchy-specific flags */
@@ -116,7 +116,6 @@ static int root_count;
* be called.
*/
static int need_forkexit_callback __read_mostly;
static int need_mm_owner_callback __read_mostly;
/* convenient tests for these bits */
inline int cgroup_is_removed(const struct cgroup *cgrp)
@@ -149,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp)
#define for_each_subsys(_root, _ss) \
list_for_each_entry(_ss, &_root->subsys_list, sibling)
/* for_each_root() allows you to iterate across the active hierarchies */
#define for_each_root(_root) \
/* for_each_active_root() allows you to iterate across the active hierarchies */
#define for_each_active_root(_root) \
list_for_each_entry(_root, &roots, root_list)
/* the list of cgroups eligible for automatic release. Protected by
@@ -272,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
rcu_read_lock();
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup *cgrp = cg->subsys[i]->cgroup;
struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup);
if (atomic_dec_and_test(&cgrp->count) &&
notify_on_release(cgrp)) {
if (taskexit)
@@ -385,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp)
return 0;
}
/**
* link_css_set - a helper function to link a css_set to a cgroup
* @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links()
* @cg: the css_set to be linked
* @cgrp: the destination cgroup
*/
static void link_css_set(struct list_head *tmp_cg_links,
struct css_set *cg, struct cgroup *cgrp)
{
struct cg_cgroup_link *link;
BUG_ON(list_empty(tmp_cg_links));
link = list_first_entry(tmp_cg_links, struct cg_cgroup_link,
cgrp_link_list);
link->cg = cg;
list_move(&link->cgrp_link_list, &cgrp->css_sets);
list_add(&link->cg_link_list, &cg->cg_links);
}
/*
* find_css_set() takes an existing cgroup group and a
* cgroup object, and returns a css_set object that's
@@ -400,7 +418,6 @@ static struct css_set *find_css_set(
int i;
struct list_head tmp_cg_links;
struct cg_cgroup_link *link;
struct hlist_head *hhead;
@@ -445,26 +462,11 @@ static struct css_set *find_css_set(
* only do it for the first subsystem in each
* hierarchy
*/
if (ss->root->subsys_list.next == &ss->sibling) {
BUG_ON(list_empty(&tmp_cg_links));
link = list_entry(tmp_cg_links.next,
struct cg_cgroup_link,
cgrp_link_list);
list_del(&link->cgrp_link_list);
list_add(&link->cgrp_link_list, &cgrp->css_sets);
link->cg = res;
list_add(&link->cg_link_list, &res->cg_links);
}
}
if (list_empty(&rootnode.subsys_list)) {
link = list_entry(tmp_cg_links.next,
struct cg_cgroup_link,
cgrp_link_list);
list_del(&link->cgrp_link_list);
list_add(&link->cgrp_link_list, &dummytop->css_sets);
link->cg = res;
list_add(&link->cg_link_list, &res->cg_links);
if (ss->root->subsys_list.next == &ss->sibling)
link_css_set(&tmp_cg_links, res, cgrp);
}
if (list_empty(&rootnode.subsys_list))
link_css_set(&tmp_cg_links, res, dummytop);
BUG_ON(!list_empty(&tmp_cg_links));
@@ -573,7 +575,6 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_blocks = 0;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info;
}
@@ -588,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
for_each_subsys(cgrp->root, ss)
if (ss->pre_destroy && cgrp->subsys[ss->subsys_id])
if (ss->pre_destroy)
ss->pre_destroy(ss, cgrp);
return;
}
static void free_cgroup_rcu(struct rcu_head *obj)
{
struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head);
kfree(cgrp);
}
static void cgroup_diput(struct dentry *dentry, struct inode *inode)
{
/* is dentry a directory ? if so, kfree() associated cgroup */
@@ -612,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
/*
* Release the subsystem state objects.
*/
for_each_subsys(cgrp->root, ss) {
if (cgrp->subsys[ss->subsys_id])
ss->destroy(ss, cgrp);
}
for_each_subsys(cgrp->root, ss)
ss->destroy(ss, cgrp);
cgrp->root->number_of_cgroups--;
mutex_unlock(&cgroup_mutex);
/* Drop the active superblock reference that we took when we
* created the cgroup */
/*
* Drop the active superblock reference that we took when we
* created the cgroup
*/
deactivate_super(cgrp->root->sb);
kfree(cgrp);
call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
}
iput(inode);
}
@@ -714,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root,
BUG_ON(cgrp->subsys[i]);
BUG_ON(!dummytop->subsys[i]);
BUG_ON(dummytop->subsys[i]->cgroup != dummytop);
mutex_lock(&ss->hierarchy_mutex);
cgrp->subsys[i] = dummytop->subsys[i];
cgrp->subsys[i]->cgroup = cgrp;
list_add(&ss->sibling, &root->subsys_list);
rcu_assign_pointer(ss->root, root);
list_move(&ss->sibling, &root->subsys_list);
ss->root = root;
if (ss->bind)
ss->bind(ss, cgrp);
mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & removed_bits) {
/* We're removing this subsystem */
BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]);
BUG_ON(cgrp->subsys[i]->cgroup != cgrp);
mutex_lock(&ss->hierarchy_mutex);
if (ss->bind)
ss->bind(ss, dummytop);
dummytop->subsys[i]->cgroup = dummytop;
cgrp->subsys[i] = NULL;
rcu_assign_pointer(subsys[i]->root, &rootnode);
list_del(&ss->sibling);
subsys[i]->root = &rootnode;
list_move(&ss->sibling, &rootnode.subsys_list);
mutex_unlock(&ss->hierarchy_mutex);
} else if (bit & final_bits) {
/* Subsystem state should already exist */
BUG_ON(!cgrp->subsys[i]);
@@ -992,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
root = NULL;
} else {
/* New superblock */
struct cgroup *cgrp = &root->top_cgroup;
struct cgroup *root_cgrp = &root->top_cgroup;
struct inode *inode;
int i;
@@ -1033,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
list_add(&root->root_list, &roots);
root_count++;
sb->s_root->d_fsdata = &root->top_cgroup;
sb->s_root->d_fsdata = root_cgrp;
root->top_cgroup.dentry = sb->s_root;
/* Link the top cgroup in this hierarchy into all
@@ -1044,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
struct hlist_node *node;
struct css_set *cg;
hlist_for_each_entry(cg, node, hhead, hlist) {
struct cg_cgroup_link *link;
BUG_ON(list_empty(&tmp_cg_links));
link = list_entry(tmp_cg_links.next,
struct cg_cgroup_link,
cgrp_link_list);
list_del(&link->cgrp_link_list);
link->cg = cg;
list_add(&link->cgrp_link_list,
&root->top_cgroup.css_sets);
list_add(&link->cg_link_list, &cg->cg_links);
}
hlist_for_each_entry(cg, node, hhead, hlist)
link_css_set(&tmp_cg_links, cg, root_cgrp);
}
write_unlock(&css_set_lock);
free_cg_links(&tmp_cg_links);
BUG_ON(!list_empty(&cgrp->sibling));
BUG_ON(!list_empty(&cgrp->children));
BUG_ON(!list_empty(&root_cgrp->sibling));
BUG_ON(!list_empty(&root_cgrp->children));
BUG_ON(root->number_of_cgroups != 1);
cgroup_populate_dir(cgrp);
cgroup_populate_dir(root_cgrp);
mutex_unlock(&inode->i_mutex);
mutex_unlock(&cgroup_mutex);
}
@@ -1115,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) {
}
write_unlock(&css_set_lock);
if (!list_empty(&root->root_list)) {
list_del(&root->root_list);
root_count--;
}
list_del(&root->root_list);
root_count--;
mutex_unlock(&cgroup_mutex);
kfree(root);
@@ -1147,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Called with cgroup_mutex held. Writes path of cgroup into buf.
* Returns 0 on success, -errno on error.
* Called with cgroup_mutex held or else with an RCU-protected cgroup
* reference. Writes path of cgroup into buf. Returns 0 on success,
* -errno on error.
*/
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
char *start;
struct dentry *dentry = rcu_dereference(cgrp->dentry);
if (cgrp == dummytop) {
if (!dentry || cgrp == dummytop) {
/*
* Inactive subsystems have no dentry for their root
* cgroup
@@ -1167,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
*--start = '\0';
for (;;) {
int len = cgrp->dentry->d_name.len;
int len = dentry->d_name.len;
if ((start -= len) < buf)
return -ENAMETOOLONG;
memcpy(start, cgrp->dentry->d_name.name, len);
cgrp = cgrp->parent;
if (!cgrp)
break;
dentry = rcu_dereference(cgrp->dentry);
if (!cgrp->parent)
continue;
if (--start < buf)
@@ -1218,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
int retval = 0;
struct cgroup_subsys *ss;
struct cgroup *oldcgrp;
struct css_set *cg = tsk->cgroups;
struct css_set *cg;
struct css_set *newcg;
struct cgroupfs_root *root = cgrp->root;
int subsys_id;
@@ -1238,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
}
}
task_lock(tsk);
cg = tsk->cgroups;
get_css_set(cg);
task_unlock(tsk);
/*
* Locate or allocate a new css_set for this task,
* based on its final set of cgroups
*/
newcg = find_css_set(cg, cgrp);
put_css_set(cg);
if (!newcg)
return -ENOMEM;
@@ -1447,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf,
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
if (!cft || cgroup_is_removed(cgrp))
if (cgroup_is_removed(cgrp))
return -ENODEV;
if (cft->write)
return cft->write(cgrp, cft, file, buf, nbytes, ppos);
@@ -1492,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf,
struct cftype *cft = __d_cft(file->f_dentry);
struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
if (!cft || cgroup_is_removed(cgrp))
if (cgroup_is_removed(cgrp))
return -ENODEV;
if (cft->read)
@@ -1556,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file)
err = generic_file_open(inode, file);
if (err)
return err;
cft = __d_cft(file->f_dentry);
if (!cft)
return -ENODEV;
if (cft->read_map || cft->read_seq_string) {
struct cgroup_seqfile_state *state =
kzalloc(sizeof(*state), GFP_USER);
@@ -1673,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry,
if (!error) {
dentry->d_fsdata = cgrp;
inc_nlink(parent->d_inode);
cgrp->dentry = dentry;
rcu_assign_pointer(cgrp->dentry, dentry);
dget(dentry);
}
dput(dentry);
@@ -1814,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
{
struct task_struct *res;
struct list_head *l = it->task;
struct cg_cgroup_link *link;
/* If the iterator cg is NULL, we have no tasks */
if (!it->cg_link)
@@ -1821,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
res = list_entry(l, struct task_struct, cg_list);
/* Advance iterator to find next entry */
l = l->next;
if (l == &res->cgroups->tasks) {
link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list);
if (l == &link->cg->tasks) {
/* We reached the end of this task list - move on to
* the next cg_cgroup_link */
cgroup_advance_iter(cgrp, it);
@@ -2015,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
*/
static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
{
int n = 0;
int n = 0, pid;
struct cgroup_iter it;
struct task_struct *tsk;
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
if (unlikely(n == npids))
break;
pidarray[n++] = task_pid_vnr(tsk);
pid = task_pid_vnr(tsk);
if (pid > 0)
pidarray[n++] = pid;
}
cgroup_iter_end(cgrp, &it);
return n;
@@ -2054,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
ret = 0;
cgrp = dentry->d_fsdata;
rcu_read_lock();
cgroup_iter_start(cgrp, &it);
while ((tsk = cgroup_iter_next(cgrp, &it))) {
@@ -2079,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
}
cgroup_iter_end(cgrp, &it);
rcu_read_unlock();
err:
return ret;
}
@@ -2326,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
struct cgroup *cgrp)
{
css->cgroup = cgrp;
atomic_set(&css->refcnt, 0);
atomic_set(&css->refcnt, 1);
css->flags = 0;
if (cgrp == dummytop)
set_bit(CSS_ROOT, &css->flags);
@@ -2334,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
cgrp->subsys[ss->subsys_id] = css;
}
static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
{
/* We need to take each hierarchy_mutex in a consistent order */
int i;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->root == root)
mutex_lock_nested(&ss->hierarchy_mutex, i);
}
}
static void cgroup_unlock_hierarchy(struct cgroupfs_root *root)
{
int i;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
if (ss->root == root)
mutex_unlock(&ss->hierarchy_mutex);
}
}
/*
* cgroup_create - create a cgroup
* @parent: cgroup that will be parent of the new cgroup
@@ -2382,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
init_cgroup_css(css, ss, cgrp);
}
cgroup_lock_hierarchy(root);
list_add(&cgrp->sibling, &cgrp->parent->children);
cgroup_unlock_hierarchy(root);
root->number_of_cgroups++;
err = cgroup_create_dir(cgrp, dentry, mode);
@@ -2433,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
{
/* Check the reference count on each subsystem. Since we
* already established that there are no tasks in the
* cgroup, if the css refcount is also 0, then there should
* cgroup, if the css refcount is also 1, then there should
* be no outstanding references, so the subsystem is safe to
* destroy. We scan across all subsystems rather than using
* the per-hierarchy linked list of mounted subsystems since
@@ -2454,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
* matter, since it can only happen if the cgroup
* has been deleted and hence no longer needs the
* release agent to be called anyway. */
if (css && atomic_read(&css->refcnt))
if (css && (atomic_read(&css->refcnt) > 1))
return 1;
}
return 0;
}
/*
* Atomically mark all (or else none) of the cgroup's CSS objects as
* CSS_REMOVED. Return true on success, or false if the cgroup has
* busy subsystems. Call with cgroup_mutex held
*/
static int cgroup_clear_css_refs(struct cgroup *cgrp)
{
struct cgroup_subsys *ss;
unsigned long flags;
bool failed = false;
local_irq_save(flags);
for_each_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
int refcnt;
do {
/* We can only remove a CSS with a refcnt==1 */
refcnt = atomic_read(&css->refcnt);
if (refcnt > 1) {
failed = true;
goto done;
}
BUG_ON(!refcnt);
/*
* Drop the refcnt to 0 while we check other
* subsystems. This will cause any racing
* css_tryget() to spin until we set the
* CSS_REMOVED bits or abort
*/
} while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt);
}
done:
for_each_subsys(cgrp->root, ss) {
struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
if (failed) {
/*
* Restore old refcnt if we previously managed
* to clear it from 1 to 0
*/
if (!atomic_read(&css->refcnt))
atomic_set(&css->refcnt, 1);
} else {
/* Commit the fact that the CSS is removed */
set_bit(CSS_REMOVED, &css->flags);
}
}
local_irq_restore(flags);
return !failed;
}
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
{
struct cgroup *cgrp = dentry->d_fsdata;
struct dentry *d;
struct cgroup *parent;
struct super_block *sb;
struct cgroupfs_root *root;
/* the vfs holds both inode->i_mutex already */
@@ -2489,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
mutex_lock(&cgroup_mutex);
parent = cgrp->parent;
root = cgrp->root;
sb = root->sb;
if (atomic_read(&cgrp->count)
|| !list_empty(&cgrp->children)
|| cgroup_has_css_refs(cgrp)) {
|| !cgroup_clear_css_refs(cgrp)) {
mutex_unlock(&cgroup_mutex);
return -EBUSY;
}
@@ -2504,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
if (!list_empty(&cgrp->release_list))
list_del(&cgrp->release_list);
spin_unlock(&release_list_lock);
/* delete my sibling from parent->children */
cgroup_lock_hierarchy(cgrp->root);
/* delete this cgroup from parent->children */
list_del(&cgrp->sibling);
cgroup_unlock_hierarchy(cgrp->root);
spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
spin_unlock(&d->d_lock);
@@ -2527,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
/* Create the top cgroup state for this subsystem */
list_add(&ss->sibling, &rootnode.subsys_list);
ss->root = &rootnode;
css = ss->create(ss, dummytop);
/* We don't handle early failures gracefully */
@@ -2540,13 +2623,13 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
need_forkexit_callback |= ss->fork || ss->exit;
need_mm_owner_callback |= !!ss->mm_owner_changed;
/* At system boot, before all subsystems have been
* registered, no tasks have been forked, so we don't
* need to invoke fork callbacks here. */
BUG_ON(!list_empty(&init_task.tasks));
mutex_init(&ss->hierarchy_mutex);
ss->active = 1;
}
@@ -2565,7 +2648,6 @@ int __init cgroup_init_early(void)
INIT_HLIST_NODE(&init_css_set.hlist);
css_set_count = 1;
init_cgroup_root(&rootnode);
list_add(&rootnode.root_list, &roots);
root_count = 1;
init_task.cgroups = &init_css_set;
@@ -2672,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v)
mutex_lock(&cgroup_mutex);
for_each_root(root) {
for_each_active_root(root) {
struct cgroup_subsys *ss;
struct cgroup *cgrp;
int subsys_id;
int count = 0;
/* Skip this hierarchy if it has no active subsystems */
if (!root->actual_subsys_bits)
continue;
seq_printf(m, "%lu:", root->subsys_bits);
for_each_subsys(root, ss)
seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
@@ -2790,37 +2869,6 @@ void cgroup_fork_callbacks(struct task_struct *child)
}
}
#ifdef CONFIG_MM_OWNER
/**
* cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
* @p: the new owner
*
* Called on every change to mm->owner. mm_init_owner() does not
* invoke this routine, since it assigns the mm->owner the first time
* and does not change it.
*
* The callbacks are invoked with mmap_sem held in read mode.
*/
void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
{
struct cgroup *oldcgrp, *newcgrp = NULL;
if (need_mm_owner_callback) {
int i;
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
struct cgroup_subsys *ss = subsys[i];
oldcgrp = task_cgroup(old, ss->subsys_id);
if (new)
newcgrp = task_cgroup(new, ss->subsys_id);
if (oldcgrp == newcgrp)
continue;
if (ss->mm_owner_changed)
ss->mm_owner_changed(ss, oldcgrp, newcgrp, new);
}
}
}
#endif /* CONFIG_MM_OWNER */
/**
* cgroup_post_fork - called on a new task after adding it to the task list
* @child: the task in question
@@ -2834,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child)
{
if (use_task_css_set_links) {
write_lock(&css_set_lock);
task_lock(child);
if (list_empty(&child->cg_list))
list_add(&child->cg_list, &child->cgroups->tasks);
task_unlock(child);
write_unlock(&css_set_lock);
}
}
@@ -2941,14 +2991,20 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
mutex_unlock(&cgroup_mutex);
return 0;
}
task_lock(tsk);
cg = tsk->cgroups;
parent = task_cgroup(tsk, subsys->subsys_id);
/* Pin the hierarchy */
atomic_inc(&parent->root->sb->s_active);
if (!atomic_inc_not_zero(&parent->root->sb->s_active)) {
/* We race with the final deactivate_super() */
mutex_unlock(&cgroup_mutex);
return 0;
}
/* Keep the cgroup alive */
get_css_set(cg);
task_unlock(tsk);
mutex_unlock(&cgroup_mutex);
/* Now do the VFS work to create a cgroup */
@@ -2967,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
}
/* Create the cgroup directory, which also creates the cgroup */
ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
ret = vfs_mkdir(inode, dentry, 0755);
child = __d_cgrp(dentry);
dput(dentry);
if (ret) {
@@ -2977,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys,
goto out_release;
}
if (!child) {
printk(KERN_INFO
"Couldn't find new cgroup %s\n", nodename);
ret = -ENOMEM;
goto out_release;
}
/* The cgroup now exists. Retake cgroup_mutex and check
* that we're still in the same state that we thought we
* were. */
@@ -3079,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css)
{
struct cgroup *cgrp = css->cgroup;
rcu_read_lock();
if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) {
if ((atomic_dec_return(&css->refcnt) == 1) &&
notify_on_release(cgrp)) {
set_bit(CGRP_RELEASABLE, &cgrp->flags);
check_for_release(cgrp);
}

View File

@@ -24,6 +24,7 @@
#include <linux/migrate.h>
#include <linux/posix-timers.h>
#include <linux/times.h>
#include <linux/ptrace.h>
#include <asm/uaccess.h>
@@ -229,6 +230,7 @@ asmlinkage long compat_sys_times(struct compat_tms __user *tbuf)
if (copy_to_user(tbuf, &tmp, sizeof(tmp)))
return -EFAULT;
}
force_successful_syscall_return();
return compat_jiffies_to_clock_t(jiffies);
}
@@ -454,16 +456,16 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
}
static int compat_get_user_cpu_mask(compat_ulong_t __user *user_mask_ptr,
unsigned len, cpumask_t *new_mask)
unsigned len, struct cpumask *new_mask)
{
unsigned long *k;
if (len < sizeof(cpumask_t))
memset(new_mask, 0, sizeof(cpumask_t));
else if (len > sizeof(cpumask_t))
len = sizeof(cpumask_t);
if (len < cpumask_size())
memset(new_mask, 0, cpumask_size());
else if (len > cpumask_size())
len = cpumask_size();
k = cpus_addr(*new_mask);
k = cpumask_bits(new_mask);
return compat_get_bitmap(k, user_mask_ptr, len * 8);
}
@@ -471,40 +473,51 @@ asmlinkage long compat_sys_sched_setaffinity(compat_pid_t pid,
unsigned int len,
compat_ulong_t __user *user_mask_ptr)
{
cpumask_t new_mask;
cpumask_var_t new_mask;
int retval;
retval = compat_get_user_cpu_mask(user_mask_ptr, len, &new_mask);
if (retval)
return retval;
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
return -ENOMEM;
return sched_setaffinity(pid, &new_mask);
retval = compat_get_user_cpu_mask(user_mask_ptr, len, new_mask);
if (retval)
goto out;
retval = sched_setaffinity(pid, new_mask);
out:
free_cpumask_var(new_mask);
return retval;
}
asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid, unsigned int len,
compat_ulong_t __user *user_mask_ptr)
{
int ret;
cpumask_t mask;
cpumask_var_t mask;
unsigned long *k;
unsigned int min_length = sizeof(cpumask_t);
unsigned int min_length = cpumask_size();
if (NR_CPUS <= BITS_PER_COMPAT_LONG)
if (nr_cpu_ids <= BITS_PER_COMPAT_LONG)
min_length = sizeof(compat_ulong_t);
if (len < min_length)
return -EINVAL;
ret = sched_getaffinity(pid, &mask);
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
ret = sched_getaffinity(pid, mask);
if (ret < 0)
return ret;
goto out;
k = cpus_addr(mask);
k = cpumask_bits(mask);
ret = compat_put_bitmap(user_mask_ptr, k, min_length * 8);
if (ret)
return ret;
if (ret == 0)
ret = min_length;
return min_length;
out:
free_cpumask_var(mask);
return ret;
}
int get_compat_itimerspec(struct itimerspec *dst,
@@ -883,8 +896,9 @@ asmlinkage long compat_sys_time(compat_time_t __user * tloc)
if (tloc) {
if (put_user(i,tloc))
i = -EFAULT;
return -EFAULT;
}
force_successful_syscall_return();
return i;
}

View File

@@ -15,29 +15,8 @@
#include <linux/stop_machine.h>
#include <linux/mutex.h>
/*
* Represents all cpu's present in the system
* In systems capable of hotplug, this map could dynamically grow
* as new cpu's are detected in the system via any platform specific
* method, such as ACPI for e.g.
*/
cpumask_t cpu_present_map __read_mostly;
EXPORT_SYMBOL(cpu_present_map);
#ifndef CONFIG_SMP
/*
* Represents all cpu's that are currently online.
*/
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
EXPORT_SYMBOL(cpu_online_map);
cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
EXPORT_SYMBOL(cpu_possible_map);
#else /* CONFIG_SMP */
/* Serializes the updates to cpu_online_map, cpu_present_map */
#ifdef CONFIG_SMP
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
static DEFINE_MUTEX(cpu_add_remove_lock);
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
@@ -64,8 +43,6 @@ void __init cpu_hotplug_init(void)
cpu_hotplug.refcount = 0;
}
cpumask_t cpu_active_map;
#ifdef CONFIG_HOTPLUG_CPU
void get_online_cpus(void)
@@ -96,7 +73,7 @@ EXPORT_SYMBOL_GPL(put_online_cpus);
/*
* The following two API's must be used when attempting
* to serialize the updates to cpu_online_map, cpu_present_map.
* to serialize the updates to cpu_online_mask, cpu_present_mask.
*/
void cpu_maps_update_begin(void)
{
@@ -217,7 +194,7 @@ static int __ref take_cpu_down(void *_param)
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
{
int err, nr_calls = 0;
cpumask_t old_allowed, tmp;
cpumask_var_t old_allowed;
void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
@@ -231,6 +208,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
if (!cpu_online(cpu))
return -EINVAL;
if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
return -ENOMEM;
cpu_hotplug_begin();
err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
hcpu, -1, &nr_calls);
@@ -245,13 +225,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
}
/* Ensure that we are not runnable on dying cpu */
old_allowed = current->cpus_allowed;
cpus_setall(tmp);
cpu_clear(cpu, tmp);
set_cpus_allowed_ptr(current, &tmp);
tmp = cpumask_of_cpu(cpu);
cpumask_copy(old_allowed, &current->cpus_allowed);
set_cpus_allowed_ptr(current,
cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
err = __stop_machine(take_cpu_down, &tcd_param, &tmp);
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) {
/* CPU didn't die: tell everyone. Can't complain. */
if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
@@ -277,7 +255,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
check_for_tasks(cpu);
out_allowed:
set_cpus_allowed_ptr(current, &old_allowed);
set_cpus_allowed_ptr(current, old_allowed);
out_release:
cpu_hotplug_done();
if (!err) {
@@ -285,13 +263,17 @@ out_release:
hcpu) == NOTIFY_BAD)
BUG();
}
free_cpumask_var(old_allowed);
return err;
}
int __ref cpu_down(unsigned int cpu)
{
int err = 0;
int err;
err = stop_machine_create();
if (err)
return err;
cpu_maps_update_begin();
if (cpu_hotplug_disabled) {
@@ -303,7 +285,7 @@ int __ref cpu_down(unsigned int cpu)
/*
* Make sure the all cpus did the reschedule and are not
* using stale version of the cpu_active_map.
* using stale version of the cpu_active_mask.
* This is not strictly necessary becuase stop_machine()
* that we run down the line already provides the required
* synchronization. But it's really a side effect and we do not
@@ -318,6 +300,7 @@ int __ref cpu_down(unsigned int cpu)
out:
cpu_maps_update_done();
stop_machine_destroy();
return err;
}
EXPORT_SYMBOL(cpu_down);
@@ -367,7 +350,7 @@ out_notify:
int __cpuinit cpu_up(unsigned int cpu)
{
int err = 0;
if (!cpu_isset(cpu, cpu_possible_map)) {
if (!cpu_possible(cpu)) {
printk(KERN_ERR "can't online cpu %d because it is not "
"configured as may-hotadd at boot time\n", cpu);
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
@@ -392,25 +375,28 @@ out:
}
#ifdef CONFIG_PM_SLEEP_SMP
static cpumask_t frozen_cpus;
static cpumask_var_t frozen_cpus;
int disable_nonboot_cpus(void)
{
int cpu, first_cpu, error = 0;
int cpu, first_cpu, error;
error = stop_machine_create();
if (error)
return error;
cpu_maps_update_begin();
first_cpu = first_cpu(cpu_online_map);
first_cpu = cpumask_first(cpu_online_mask);
/* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
*/
cpus_clear(frozen_cpus);
cpumask_clear(frozen_cpus);
printk("Disabling non-boot CPUs ...\n");
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
continue;
error = _cpu_down(cpu, 1);
if (!error) {
cpu_set(cpu, frozen_cpus);
cpumask_set_cpu(cpu, frozen_cpus);
printk("CPU%d is down\n", cpu);
} else {
printk(KERN_ERR "Error taking CPU%d down: %d\n",
@@ -426,6 +412,7 @@ int disable_nonboot_cpus(void)
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
cpu_maps_update_done();
stop_machine_destroy();
return error;
}
@@ -436,11 +423,11 @@ void __ref enable_nonboot_cpus(void)
/* Allow everyone to use the CPU hotplug again */
cpu_maps_update_begin();
cpu_hotplug_disabled = 0;
if (cpus_empty(frozen_cpus))
if (cpumask_empty(frozen_cpus))
goto out;
printk("Enabling non-boot CPUs ...\n");
for_each_cpu_mask_nr(cpu, frozen_cpus) {
for_each_cpu(cpu, frozen_cpus) {
error = _cpu_up(cpu, 1);
if (!error) {
printk("CPU%d is up\n", cpu);
@@ -448,10 +435,18 @@ void __ref enable_nonboot_cpus(void)
}
printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
}
cpus_clear(frozen_cpus);
cpumask_clear(frozen_cpus);
out:
cpu_maps_update_done();
}
static int alloc_frozen_cpus(void)
{
if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
return -ENOMEM;
return 0;
}
core_initcall(alloc_frozen_cpus);
#endif /* CONFIG_PM_SLEEP_SMP */
/**
@@ -467,7 +462,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
unsigned long val = CPU_STARTING;
#ifdef CONFIG_PM_SLEEP_SMP
if (cpu_isset(cpu, frozen_cpus))
if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
@@ -479,7 +474,7 @@ void __cpuinit notify_cpu_starting(unsigned int cpu)
* cpu_bit_bitmap[] is a special, "compressed" data structure that
* represents all NR_CPUS bits binary values of 1<<nr.
*
* It is used by cpumask_of_cpu() to get a constant address to a CPU
* It is used by cpumask_of() to get a constant address to a CPU
* mask value that has a single bit set only.
*/
@@ -502,3 +497,71 @@ EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);
static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);
static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);
static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
void set_cpu_possible(unsigned int cpu, bool possible)
{
if (possible)
cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
else
cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}
void set_cpu_present(unsigned int cpu, bool present)
{
if (present)
cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
else
cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}
void set_cpu_online(unsigned int cpu, bool online)
{
if (online)
cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
else
cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}
void set_cpu_active(unsigned int cpu, bool active)
{
if (active)
cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
else
cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}
void init_cpu_present(const struct cpumask *src)
{
cpumask_copy(to_cpumask(cpu_present_bits), src);
}
void init_cpu_possible(const struct cpumask *src)
{
cpumask_copy(to_cpumask(cpu_possible_bits), src);
}
void init_cpu_online(const struct cpumask *src)
{
cpumask_copy(to_cpumask(cpu_online_bits), src);
}

View File

@@ -84,7 +84,7 @@ struct cpuset {
struct cgroup_subsys_state css;
unsigned long flags; /* "unsigned long" so bitops work */
cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
struct cpuset *parent; /* my parent */
@@ -195,8 +195,6 @@ static int cpuset_mems_generation;
static struct cpuset top_cpuset = {
.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
.cpus_allowed = CPU_MASK_ALL,
.mems_allowed = NODE_MASK_ALL,
};
/*
@@ -239,6 +237,17 @@ static struct cpuset top_cpuset = {
static DEFINE_MUTEX(callback_mutex);
/*
* cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
* buffers. They are statically allocated to prevent using excess stack
* when calling cpuset_print_task_mems_allowed().
*/
#define CPUSET_NAME_LEN (128)
#define CPUSET_NODELIST_LEN (256)
static char cpuset_name[CPUSET_NAME_LEN];
static char cpuset_nodelist[CPUSET_NODELIST_LEN];
static DEFINE_SPINLOCK(cpuset_buffer_lock);
/*
* This is ugly, but preserves the userspace API for existing cpuset
* users. If someone tries to mount the "cpuset" filesystem, we
@@ -267,7 +276,7 @@ static struct file_system_type cpuset_fs_type = {
};
/*
* Return in *pmask the portion of a cpusets's cpus_allowed that
* Return in pmask the portion of a cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy
* until we find one that does have some online cpus. If we get
* all the way to the top and still haven't found any online cpus,
@@ -280,15 +289,16 @@ static struct file_system_type cpuset_fs_type = {
* Call with callback_mutex held.
*/
static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
static void guarantee_online_cpus(const struct cpuset *cs,
struct cpumask *pmask)
{
while (cs && !cpus_intersects(cs->cpus_allowed, cpu_online_map))
while (cs && !cpumask_intersects(cs->cpus_allowed, cpu_online_mask))
cs = cs->parent;
if (cs)
cpus_and(*pmask, cs->cpus_allowed, cpu_online_map);
cpumask_and(pmask, cs->cpus_allowed, cpu_online_mask);
else
*pmask = cpu_online_map;
BUG_ON(!cpus_intersects(*pmask, cpu_online_map));
cpumask_copy(pmask, cpu_online_mask);
BUG_ON(!cpumask_intersects(pmask, cpu_online_mask));
}
/*
@@ -364,14 +374,9 @@ void cpuset_update_task_memory_state(void)
struct task_struct *tsk = current;
struct cpuset *cs;
if (task_cs(tsk) == &top_cpuset) {
/* Don't need rcu for top_cpuset. It's never freed. */
my_cpusets_mem_gen = top_cpuset.mems_generation;
} else {
rcu_read_lock();
my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
rcu_read_unlock();
}
rcu_read_lock();
my_cpusets_mem_gen = task_cs(tsk)->mems_generation;
rcu_read_unlock();
if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
mutex_lock(&callback_mutex);
@@ -403,12 +408,43 @@ void cpuset_update_task_memory_state(void)
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
{
return cpus_subset(p->cpus_allowed, q->cpus_allowed) &&
return cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
nodes_subset(p->mems_allowed, q->mems_allowed) &&
is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
is_mem_exclusive(p) <= is_mem_exclusive(q);
}
/**
* alloc_trial_cpuset - allocate a trial cpuset
* @cs: the cpuset that the trial cpuset duplicates
*/
static struct cpuset *alloc_trial_cpuset(const struct cpuset *cs)
{
struct cpuset *trial;
trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
if (!trial)
return NULL;
if (!alloc_cpumask_var(&trial->cpus_allowed, GFP_KERNEL)) {
kfree(trial);
return NULL;
}
cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
return trial;
}
/**
* free_trial_cpuset - free the trial cpuset
* @trial: the trial cpuset to be freed
*/
static void free_trial_cpuset(struct cpuset *trial)
{
free_cpumask_var(trial->cpus_allowed);
kfree(trial);
}
/*
* validate_change() - Used to validate that any proposed cpuset change
* follows the structural rules for cpusets.
@@ -458,7 +494,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
c = cgroup_cs(cont);
if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
c != cur &&
cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
return -EINVAL;
if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
c != cur &&
@@ -468,7 +504,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
/* Cpusets with tasks can't have empty cpus_allowed or mems_allowed */
if (cgroup_task_count(cur->css.cgroup)) {
if (cpus_empty(trial->cpus_allowed) ||
if (cpumask_empty(trial->cpus_allowed) ||
nodes_empty(trial->mems_allowed)) {
return -ENOSPC;
}
@@ -483,7 +519,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
*/
static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
{
return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
return cpumask_intersects(a->cpus_allowed, b->cpus_allowed);
}
static void
@@ -508,7 +544,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
if (cpus_empty(cp->cpus_allowed))
if (cpumask_empty(cp->cpus_allowed))
continue;
if (is_sched_load_balance(cp))
@@ -575,7 +611,8 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
* element of the partition (one sched domain) to be passed to
* partition_sched_domains().
*/
static int generate_sched_domains(cpumask_t **domains,
/* FIXME: see the FIXME in partition_sched_domains() */
static int generate_sched_domains(struct cpumask **domains,
struct sched_domain_attr **attributes)
{
LIST_HEAD(q); /* queue of cpusets to be scanned */
@@ -583,10 +620,10 @@ static int generate_sched_domains(cpumask_t **domains,
struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */
int i, j, k; /* indices for partition finding loops */
cpumask_t *doms; /* resulting partition; i.e. sched domains */
struct cpumask *doms; /* resulting partition; i.e. sched domains */
struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] cpumask_t slot */
int nslot; /* next empty doms[] struct cpumask slot */
doms = NULL;
dattr = NULL;
@@ -594,7 +631,7 @@ static int generate_sched_domains(cpumask_t **domains,
/* Special case for the 99% of systems with one, full, sched domain */
if (is_sched_load_balance(&top_cpuset)) {
doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
doms = kmalloc(cpumask_size(), GFP_KERNEL);
if (!doms)
goto done;
@@ -603,7 +640,7 @@ static int generate_sched_domains(cpumask_t **domains,
*dattr = SD_ATTR_INIT;
update_domain_attr_tree(dattr, &top_cpuset);
}
*doms = top_cpuset.cpus_allowed;
cpumask_copy(doms, top_cpuset.cpus_allowed);
ndoms = 1;
goto done;
@@ -622,7 +659,7 @@ static int generate_sched_domains(cpumask_t **domains,
cp = list_first_entry(&q, struct cpuset, stack_list);
list_del(q.next);
if (cpus_empty(cp->cpus_allowed))
if (cpumask_empty(cp->cpus_allowed))
continue;
/*
@@ -673,7 +710,7 @@ restart:
* Now we know how many domains to create.
* Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
*/
doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL);
if (!doms)
goto done;
@@ -685,7 +722,7 @@ restart:
for (nslot = 0, i = 0; i < csn; i++) {
struct cpuset *a = csa[i];
cpumask_t *dp;
struct cpumask *dp;
int apn = a->pn;
if (apn < 0) {
@@ -708,14 +745,14 @@ restart:
continue;
}
cpus_clear(*dp);
cpumask_clear(dp);
if (dattr)
*(dattr + nslot) = SD_ATTR_INIT;
for (j = i; j < csn; j++) {
struct cpuset *b = csa[j];
if (apn == b->pn) {
cpus_or(*dp, *dp, b->cpus_allowed);
cpumask_or(dp, dp, b->cpus_allowed);
if (dattr)
update_domain_attr_tree(dattr + nslot, b);
@@ -755,7 +792,7 @@ done:
static void do_rebuild_sched_domains(struct work_struct *unused)
{
struct sched_domain_attr *attr;
cpumask_t *doms;
struct cpumask *doms;
int ndoms;
get_online_cpus();
@@ -824,7 +861,7 @@ void rebuild_sched_domains(void)
static int cpuset_test_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
return !cpus_equal(tsk->cpus_allowed,
return !cpumask_equal(&tsk->cpus_allowed,
(cgroup_cs(scan->cg))->cpus_allowed);
}
@@ -842,7 +879,7 @@ static int cpuset_test_cpumask(struct task_struct *tsk,
static void cpuset_change_cpumask(struct task_struct *tsk,
struct cgroup_scanner *scan)
{
set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
set_cpus_allowed_ptr(tsk, ((cgroup_cs(scan->cg))->cpus_allowed));
}
/**
@@ -874,10 +911,10 @@ static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
* @cs: the cpuset to consider
* @buf: buffer of cpu numbers written to this cpuset
*/
static int update_cpumask(struct cpuset *cs, const char *buf)
static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
struct ptr_heap heap;
struct cpuset trialcs;
int retval;
int is_load_balanced;
@@ -885,8 +922,6 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
trialcs = *cs;
/*
* An empty cpus_allowed is ok only if the cpuset has no tasks.
* Since cpulist_parse() fails on an empty mask, we special case
@@ -894,31 +929,31 @@ static int update_cpumask(struct cpuset *cs, const char *buf)
* with tasks have cpus.
*/
if (!*buf) {
cpus_clear(trialcs.cpus_allowed);
cpumask_clear(trialcs->cpus_allowed);
} else {
retval = cpulist_parse(buf, trialcs.cpus_allowed);
retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0)
return retval;
if (!cpus_subset(trialcs.cpus_allowed, cpu_online_map))
if (!cpumask_subset(trialcs->cpus_allowed, cpu_online_mask))
return -EINVAL;
}
retval = validate_change(cs, &trialcs);
retval = validate_change(cs, trialcs);
if (retval < 0)
return retval;
/* Nothing to do if the cpus didn't change */
if (cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed))
if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
return 0;
retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
if (retval)
return retval;
is_load_balanced = is_sched_load_balance(&trialcs);
is_load_balanced = is_sched_load_balance(trialcs);
mutex_lock(&callback_mutex);
cs->cpus_allowed = trialcs.cpus_allowed;
cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
mutex_unlock(&callback_mutex);
/*
@@ -1006,7 +1041,7 @@ static int update_tasks_nodemask(struct cpuset *cs, const nodemask_t *oldmem)
cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
fudge = 10; /* spare mmarray[] slots */
fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
fudge += cpumask_weight(cs->cpus_allowed);/* imagine 1 fork-bomb/cpu */
retval = -ENOMEM;
/*
@@ -1093,9 +1128,9 @@ done:
* lock each such tasks mm->mmap_sem, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed.
*/
static int update_nodemask(struct cpuset *cs, const char *buf)
static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
const char *buf)
{
struct cpuset trialcs;
nodemask_t oldmem;
int retval;
@@ -1106,8 +1141,6 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
if (cs == &top_cpuset)
return -EACCES;
trialcs = *cs;
/*
* An empty mems_allowed is ok iff there are no tasks in the cpuset.
* Since nodelist_parse() fails on an empty mask, we special case
@@ -1115,27 +1148,27 @@ static int update_nodemask(struct cpuset *cs, const char *buf)
* with tasks have memory.
*/
if (!*buf) {
nodes_clear(trialcs.mems_allowed);
nodes_clear(trialcs->mems_allowed);
} else {
retval = nodelist_parse(buf, trialcs.mems_allowed);
retval = nodelist_parse(buf, trialcs->mems_allowed);
if (retval < 0)
goto done;
if (!nodes_subset(trialcs.mems_allowed,
if (!nodes_subset(trialcs->mems_allowed,
node_states[N_HIGH_MEMORY]))
return -EINVAL;
}
oldmem = cs->mems_allowed;
if (nodes_equal(oldmem, trialcs.mems_allowed)) {
if (nodes_equal(oldmem, trialcs->mems_allowed)) {
retval = 0; /* Too easy - nothing to do */
goto done;
}
retval = validate_change(cs, &trialcs);
retval = validate_change(cs, trialcs);
if (retval < 0)
goto done;
mutex_lock(&callback_mutex);
cs->mems_allowed = trialcs.mems_allowed;
cs->mems_allowed = trialcs->mems_allowed;
cs->mems_generation = cpuset_mems_generation++;
mutex_unlock(&callback_mutex);
@@ -1156,7 +1189,8 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
if (val != cs->relax_domain_level) {
cs->relax_domain_level = val;
if (!cpus_empty(cs->cpus_allowed) && is_sched_load_balance(cs))
if (!cpumask_empty(cs->cpus_allowed) &&
is_sched_load_balance(cs))
async_rebuild_sched_domains();
}
@@ -1175,31 +1209,36 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
int turning_on)
{
struct cpuset trialcs;
struct cpuset *trialcs;
int err;
int balance_flag_changed;
trialcs = *cs;
if (turning_on)
set_bit(bit, &trialcs.flags);
else
clear_bit(bit, &trialcs.flags);
trialcs = alloc_trial_cpuset(cs);
if (!trialcs)
return -ENOMEM;
err = validate_change(cs, &trialcs);
if (turning_on)
set_bit(bit, &trialcs->flags);
else
clear_bit(bit, &trialcs->flags);
err = validate_change(cs, trialcs);
if (err < 0)
return err;
goto out;
balance_flag_changed = (is_sched_load_balance(cs) !=
is_sched_load_balance(&trialcs));
is_sched_load_balance(trialcs));
mutex_lock(&callback_mutex);
cs->flags = trialcs.flags;
cs->flags = trialcs->flags;
mutex_unlock(&callback_mutex);
if (!cpus_empty(trialcs.cpus_allowed) && balance_flag_changed)
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains();
return 0;
out:
free_trial_cpuset(trialcs);
return err;
}
/*
@@ -1300,42 +1339,47 @@ static int fmeter_getrate(struct fmeter *fmp)
return val;
}
/* Protected by cgroup_lock */
static cpumask_var_t cpus_attach;
/* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
static int cpuset_can_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct task_struct *tsk)
{
struct cpuset *cs = cgroup_cs(cont);
int ret = 0;
if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
return -ENOSPC;
if (tsk->flags & PF_THREAD_BOUND) {
cpumask_t mask;
if (tsk->flags & PF_THREAD_BOUND) {
mutex_lock(&callback_mutex);
mask = cs->cpus_allowed;
if (!cpumask_equal(&tsk->cpus_allowed, cs->cpus_allowed))
ret = -EINVAL;
mutex_unlock(&callback_mutex);
if (!cpus_equal(tsk->cpus_allowed, mask))
return -EINVAL;
}
return security_task_setscheduler(tsk, 0, NULL);
return ret < 0 ? ret : security_task_setscheduler(tsk, 0, NULL);
}
static void cpuset_attach(struct cgroup_subsys *ss,
struct cgroup *cont, struct cgroup *oldcont,
struct task_struct *tsk)
{
cpumask_t cpus;
nodemask_t from, to;
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
int err;
mutex_lock(&callback_mutex);
guarantee_online_cpus(cs, &cpus);
err = set_cpus_allowed_ptr(tsk, &cpus);
mutex_unlock(&callback_mutex);
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
} else {
mutex_lock(&callback_mutex);
guarantee_online_cpus(cs, cpus_attach);
mutex_unlock(&callback_mutex);
}
err = set_cpus_allowed_ptr(tsk, cpus_attach);
if (err)
return;
@@ -1348,7 +1392,6 @@ static void cpuset_attach(struct cgroup_subsys *ss,
cpuset_migrate_mm(mm, &from, &to);
mmput(mm);
}
}
/* The various types of files and directories in a cpuset file system */
@@ -1443,21 +1486,29 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
const char *buf)
{
int retval = 0;
struct cpuset *cs = cgroup_cs(cgrp);
struct cpuset *trialcs;
if (!cgroup_lock_live_group(cgrp))
return -ENODEV;
trialcs = alloc_trial_cpuset(cs);
if (!trialcs)
return -ENOMEM;
switch (cft->private) {
case FILE_CPULIST:
retval = update_cpumask(cgroup_cs(cgrp), buf);
retval = update_cpumask(cs, trialcs, buf);
break;
case FILE_MEMLIST:
retval = update_nodemask(cgroup_cs(cgrp), buf);
retval = update_nodemask(cs, trialcs, buf);
break;
default:
retval = -EINVAL;
break;
}
free_trial_cpuset(trialcs);
cgroup_unlock();
return retval;
}
@@ -1476,13 +1527,13 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
cpumask_t mask;
int ret;
mutex_lock(&callback_mutex);
mask = cs->cpus_allowed;
ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
return cpulist_scnprintf(page, PAGE_SIZE, mask);
return ret;
}
static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
@@ -1718,7 +1769,7 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
parent_cs = cgroup_cs(parent);
cs->mems_allowed = parent_cs->mems_allowed;
cs->cpus_allowed = parent_cs->cpus_allowed;
cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
return;
}
@@ -1744,6 +1795,10 @@ static struct cgroup_subsys_state *cpuset_create(
cs = kmalloc(sizeof(*cs), GFP_KERNEL);
if (!cs)
return ERR_PTR(-ENOMEM);
if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
kfree(cs);
return ERR_PTR(-ENOMEM);
}
cpuset_update_task_memory_state();
cs->flags = 0;
@@ -1752,7 +1807,7 @@ static struct cgroup_subsys_state *cpuset_create(
if (is_spread_slab(parent))
set_bit(CS_SPREAD_SLAB, &cs->flags);
set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
cpus_clear(cs->cpus_allowed);
cpumask_clear(cs->cpus_allowed);
nodes_clear(cs->mems_allowed);
cs->mems_generation = cpuset_mems_generation++;
fmeter_init(&cs->fmeter);
@@ -1779,6 +1834,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
number_of_cpusets--;
free_cpumask_var(cs->cpus_allowed);
kfree(cs);
}
@@ -1802,6 +1858,8 @@ struct cgroup_subsys cpuset_subsys = {
int __init cpuset_init_early(void)
{
alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed);
top_cpuset.mems_generation = cpuset_mems_generation++;
return 0;
}
@@ -1817,7 +1875,7 @@ int __init cpuset_init(void)
{
int err = 0;
cpus_setall(top_cpuset.cpus_allowed);
cpumask_setall(top_cpuset.cpus_allowed);
nodes_setall(top_cpuset.mems_allowed);
fmeter_init(&top_cpuset.fmeter);
@@ -1829,6 +1887,9 @@ int __init cpuset_init(void)
if (err < 0)
return err;
if (!alloc_cpumask_var(&cpus_attach, GFP_KERNEL))
BUG();
number_of_cpusets = 1;
return 0;
}
@@ -1903,7 +1964,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
* has online cpus, so can't be empty).
*/
parent = cs->parent;
while (cpus_empty(parent->cpus_allowed) ||
while (cpumask_empty(parent->cpus_allowed) ||
nodes_empty(parent->mems_allowed))
parent = parent->parent;
@@ -1944,7 +2005,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
}
/* Continue past cpusets with all cpus, mems online */
if (cpus_subset(cp->cpus_allowed, cpu_online_map) &&
if (cpumask_subset(cp->cpus_allowed, cpu_online_mask) &&
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
@@ -1952,13 +2013,14 @@ static void scan_for_empty_cpusets(struct cpuset *root)
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
cpus_and(cp->cpus_allowed, cp->cpus_allowed, cpu_online_map);
cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
cpu_online_mask);
nodes_and(cp->mems_allowed, cp->mems_allowed,
node_states[N_HIGH_MEMORY]);
mutex_unlock(&callback_mutex);
/* Move tasks from the empty cpuset to a parent */
if (cpus_empty(cp->cpus_allowed) ||
if (cpumask_empty(cp->cpus_allowed) ||
nodes_empty(cp->mems_allowed))
remove_tasks_in_empty_cpuset(cp);
else {
@@ -1984,7 +2046,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
unsigned long phase, void *unused_cpu)
{
struct sched_domain_attr *attr;
cpumask_t *doms;
struct cpumask *doms;
int ndoms;
switch (phase) {
@@ -1999,7 +2061,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
}
cgroup_lock();
top_cpuset.cpus_allowed = cpu_online_map;
cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
scan_for_empty_cpusets(&top_cpuset);
ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
@@ -2044,7 +2106,7 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
void __init cpuset_init_smp(void)
{
top_cpuset.cpus_allowed = cpu_online_map;
cpumask_copy(top_cpuset.cpus_allowed, cpu_online_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
@@ -2054,15 +2116,15 @@ void __init cpuset_init_smp(void)
/**
* cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
* @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
* @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
* @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
*
* Description: Returns the cpumask_t cpus_allowed of the cpuset
* Description: Returns the cpumask_var_t cpus_allowed of the cpuset
* attached to the specified @tsk. Guaranteed to return some non-empty
* subset of cpu_online_map, even if this means going outside the
* tasks cpuset.
**/
void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
{
mutex_lock(&callback_mutex);
cpuset_cpus_allowed_locked(tsk, pmask);
@@ -2073,7 +2135,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
* cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
* Must be called with callback_mutex held.
**/
void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
{
task_lock(tsk);
guarantee_online_cpus(task_cs(tsk), pmask);
@@ -2356,6 +2418,29 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
}
/**
* cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
* @task: pointer to task_struct of some task.
*
* Description: Prints @task's name, cpuset name, and cached copy of its
* mems_allowed to the kernel log. Must hold task_lock(task) to allow
* dereferencing task_cs(task).
*/
void cpuset_print_task_mems_allowed(struct task_struct *tsk)
{
struct dentry *dentry;
dentry = task_cs(tsk)->css.cgroup->dentry;
spin_lock(&cpuset_buffer_lock);
snprintf(cpuset_name, CPUSET_NAME_LEN,
dentry ? (const char *)dentry->d_name.name : "/");
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
tsk->mems_allowed);
printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
tsk->comm, cpuset_name, cpuset_nodelist);
spin_unlock(&cpuset_buffer_lock);
}
/*
* Collection of memory_pressure is suppressed unless
* this flag is enabled by writing "1" to the special

View File

@@ -372,7 +372,8 @@ int commit_creds(struct cred *new)
old->fsuid != new->fsuid ||
old->fsgid != new->fsgid ||
!cap_issubset(new->cap_permitted, old->cap_permitted)) {
set_dumpable(task->mm, suid_dumpable);
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;
smp_wmb();
}
@@ -506,6 +507,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
else
old = get_cred(&init_cred);
*new = *old;
get_uid(new->user);
get_group_info(new->group_info);
@@ -529,6 +531,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
error:
put_cred(new);
put_cred(old);
return NULL;
}
EXPORT_SYMBOL(prepare_kernel_cred);

View File

@@ -109,20 +109,40 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
int dma_alloc_from_coherent(struct device *dev, ssize_t size,
dma_addr_t *dma_handle, void **ret)
{
struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
struct dma_coherent_mem *mem;
int order = get_order(size);
int pageno;
if (mem) {
int page = bitmap_find_free_region(mem->bitmap, mem->size,
order);
if (page >= 0) {
*dma_handle = mem->device_base + (page << PAGE_SHIFT);
*ret = mem->virt_base + (page << PAGE_SHIFT);
memset(*ret, 0, size);
} else if (mem->flags & DMA_MEMORY_EXCLUSIVE)
*ret = NULL;
if (!dev)
return 0;
mem = dev->dma_mem;
if (!mem)
return 0;
if (unlikely(size > mem->size))
return 0;
pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
if (pageno >= 0) {
/*
* Memory was found in the per-device arena.
*/
*dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
*ret = mem->virt_base + (pageno << PAGE_SHIFT);
memset(*ret, 0, size);
} else if (mem->flags & DMA_MEMORY_EXCLUSIVE) {
/*
* The per-device arena is exhausted and we are not
* permitted to fall back to generic memory.
*/
*ret = NULL;
} else {
/*
* The per-device arena is exhausted and we are
* permitted to fall back to generic memory.
*/
return 0;
}
return (mem != NULL);
return 1;
}
EXPORT_SYMBOL(dma_alloc_from_coherent);

View File

@@ -642,35 +642,31 @@ retry:
/*
* We found no owner yet mm_users > 1: this implies that we are
* most likely racing with swapoff (try_to_unuse()) or /proc or
* ptrace or page migration (get_task_mm()). Mark owner as NULL,
* so that subsystems can understand the callback and take action.
* ptrace or page migration (get_task_mm()). Mark owner as NULL.
*/
down_write(&mm->mmap_sem);
cgroup_mm_owner_callbacks(mm->owner, NULL);
mm->owner = NULL;
up_write(&mm->mmap_sem);
return;
assign_new_owner:
BUG_ON(c == p);
get_task_struct(c);
read_unlock(&tasklist_lock);
down_write(&mm->mmap_sem);
/*
* The task_lock protects c->mm from changing.
* We always want mm->owner->mm == mm
*/
task_lock(c);
/*
* Delay read_unlock() till we have the task_lock()
* to ensure that c does not slip away underneath us
*/
read_unlock(&tasklist_lock);
if (c->mm != mm) {
task_unlock(c);
up_write(&mm->mmap_sem);
put_task_struct(c);
goto retry;
}
cgroup_mm_owner_callbacks(mm->owner, c);
mm->owner = c;
task_unlock(c);
up_write(&mm->mmap_sem);
put_task_struct(c);
}
#endif /* CONFIG_MM_OWNER */
@@ -1052,10 +1048,7 @@ NORET_TYPE void do_exit(long code)
preempt_count());
acct_update_integrals(tsk);
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
update_hiwater_vm(tsk->mm);
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
hrtimer_cancel(&tsk->signal->real_timer);

View File

@@ -405,6 +405,18 @@ __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
#define allocate_mm() (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
#define free_mm(mm) (kmem_cache_free(mm_cachep, (mm)))
static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
static int __init coredump_filter_setup(char *s)
{
default_dump_filter =
(simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
MMF_DUMP_FILTER_MASK;
return 1;
}
__setup("coredump_filter=", coredump_filter_setup);
#include <linux/init_task.h>
static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
@@ -413,8 +425,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
atomic_set(&mm->mm_count, 1);
init_rwsem(&mm->mmap_sem);
INIT_LIST_HEAD(&mm->mmlist);
mm->flags = (current->mm) ? current->mm->flags
: MMF_DUMP_FILTER_DEFAULT;
mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
mm->core_state = NULL;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
@@ -763,7 +774,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
{
struct sighand_struct *sig;
if (clone_flags & (CLONE_SIGHAND | CLONE_THREAD)) {
if (clone_flags & CLONE_SIGHAND) {
atomic_inc(&current->sighand->count);
return 0;
}
@@ -1120,12 +1131,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (pid != &init_struct_pid) {
retval = -ENOMEM;
pid = alloc_pid(task_active_pid_ns(p));
pid = alloc_pid(p->nsproxy->pid_ns);
if (!pid)
goto bad_fork_cleanup_io;
if (clone_flags & CLONE_NEWPID) {
retval = pid_ns_prepare_proc(task_active_pid_ns(p));
retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
if (retval < 0)
goto bad_fork_free_pid;
}
@@ -1475,12 +1486,10 @@ void __init proc_caches_init(void)
fs_cachep = kmem_cache_create("fs_cache",
sizeof(struct fs_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
vm_area_cachep = kmem_cache_create("vm_area_struct",
sizeof(struct vm_area_struct), 0,
SLAB_PANIC, NULL);
mm_cachep = kmem_cache_create("mm_struct",
sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
mmap_init();
}
/*

View File

@@ -170,8 +170,11 @@ static void get_futex_key_refs(union futex_key *key)
*/
static void drop_futex_key_refs(union futex_key *key)
{
if (!key->both.ptr)
if (!key->both.ptr) {
/* If we're here then we tried to put a key we failed to get */
WARN_ON_ONCE(1);
return;
}
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
case FUT_OFF_INODE:
@@ -730,8 +733,8 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
}
spin_unlock(&hb->lock);
out:
put_futex_key(fshared, &key);
out:
return ret;
}
@@ -755,7 +758,7 @@ retryfull:
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out;
goto out_put_key1;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -777,12 +780,12 @@ retry:
* but we might get them from range checking
*/
ret = op_ret;
goto out;
goto out_put_keys;
#endif
if (unlikely(op_ret != -EFAULT)) {
ret = op_ret;
goto out;
goto out_put_keys;
}
/*
@@ -796,7 +799,7 @@ retry:
ret = futex_handle_fault((unsigned long)uaddr2,
attempt);
if (ret)
goto out;
goto out_put_keys;
goto retry;
}
@@ -834,10 +837,11 @@ retry:
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
out:
out_put_keys:
put_futex_key(fshared, &key2);
out_put_key1:
put_futex_key(fshared, &key1);
out:
return ret;
}
@@ -854,13 +858,13 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
struct futex_q *this, *next;
int ret, drop_count = 0;
retry:
retry:
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out;
goto out_put_key1;
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
@@ -882,7 +886,7 @@ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
if (!ret)
goto retry;
return ret;
goto out_put_keys;
}
if (curval != *cmpval) {
ret = -EAGAIN;
@@ -927,9 +931,11 @@ out_unlock:
while (--drop_count >= 0)
drop_futex_key_refs(&key1);
out:
out_put_keys:
put_futex_key(fshared, &key2);
out_put_key1:
put_futex_key(fshared, &key1);
out:
return ret;
}
@@ -990,7 +996,7 @@ static int unqueue_me(struct futex_q *q)
int ret = 0;
/* In the common case we don't take the spinlock, which is nice. */
retry:
retry:
lock_ptr = q->lock_ptr;
barrier();
if (lock_ptr != NULL) {
@@ -1172,11 +1178,11 @@ static int futex_wait(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
q.bitset = bitset;
retry:
retry:
q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
goto out_release_sem;
goto out;
hb = queue_lock(&q);
@@ -1204,6 +1210,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
if (unlikely(ret)) {
queue_unlock(&q, hb);
put_futex_key(fshared, &q.key);
ret = get_user(uval, uaddr);
@@ -1213,7 +1220,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
}
ret = -EWOULDBLOCK;
if (uval != val)
goto out_unlock_release_sem;
goto out_unlock_put_key;
/* Only actually queue if *uaddr contained val. */
queue_me(&q, hb);
@@ -1305,11 +1312,11 @@ static int futex_wait(u32 __user *uaddr, int fshared,
return -ERESTART_RESTARTBLOCK;
}
out_unlock_release_sem:
out_unlock_put_key:
queue_unlock(&q, hb);
out_release_sem:
put_futex_key(fshared, &q.key);
out:
return ret;
}
@@ -1358,16 +1365,16 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
}
q.pi_state = NULL;
retry:
retry:
q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
goto out_release_sem;
goto out;
retry_unlocked:
retry_unlocked:
hb = queue_lock(&q);
retry_locked:
retry_locked:
ret = lock_taken = 0;
/*
@@ -1388,14 +1395,14 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
*/
if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
ret = -EDEADLK;
goto out_unlock_release_sem;
goto out_unlock_put_key;
}
/*
* Surprise - we got the lock. Just return to userspace:
*/
if (unlikely(!curval))
goto out_unlock_release_sem;
goto out_unlock_put_key;
uval = curval;
@@ -1431,7 +1438,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
* We took the lock due to owner died take over.
*/
if (unlikely(lock_taken))
goto out_unlock_release_sem;
goto out_unlock_put_key;
/*
* We dont have the lock. Look up the PI state (or create it if
@@ -1470,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
goto retry_locked;
}
default:
goto out_unlock_release_sem;
goto out_unlock_put_key;
}
}
@@ -1561,16 +1568,17 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
destroy_hrtimer_on_stack(&to->timer);
return ret != -EINTR ? ret : -ERESTARTNOINTR;
out_unlock_release_sem:
out_unlock_put_key:
queue_unlock(&q, hb);
out_release_sem:
out_put_key:
put_futex_key(fshared, &q.key);
out:
if (to)
destroy_hrtimer_on_stack(&to->timer);
return ret;
uaddr_faulted:
uaddr_faulted:
/*
* We have to r/w *(int __user *)uaddr, and we have to modify it
* atomically. Therefore, if we continue to fault after get_user()
@@ -1583,7 +1591,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, attempt);
if (ret)
goto out_release_sem;
goto out_put_key;
goto retry_unlocked;
}
@@ -1675,9 +1683,9 @@ retry_unlocked:
out_unlock:
spin_unlock(&hb->lock);
out:
put_futex_key(fshared, &key);
out:
return ret;
pi_faulted:

View File

@@ -32,7 +32,6 @@
*/
#include <linux/cpu.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/hrtimer.h>
@@ -635,7 +634,6 @@ static inline void hrtimer_init_timer_hres(struct hrtimer *timer)
{
}
static void __run_hrtimer(struct hrtimer *timer);
/*
* When High resolution timers are active, try to reprogram. Note, that in case
@@ -647,13 +645,9 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
{
if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
/*
* XXX: recursion check?
* hrtimer_forward() should round up with timer granularity
* so that we never get into inf recursion here,
* it doesn't do that though
*/
__run_hrtimer(timer);
spin_unlock(&base->cpu_base->lock);
raise_softirq_irqoff(HRTIMER_SOFTIRQ);
spin_lock(&base->cpu_base->lock);
return 1;
}
return 0;
@@ -706,11 +700,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
}
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
static inline int hrtimer_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
{
return 0;
}
#endif /* CONFIG_HIGH_RES_TIMERS */
@@ -781,9 +770,11 @@ EXPORT_SYMBOL_GPL(hrtimer_forward);
*
* The timer is inserted in expiry order. Insertion into the
* red black tree is O(log(n)). Must hold the base lock.
*
* Returns 1 when the new timer is the leftmost timer in the tree.
*/
static void enqueue_hrtimer(struct hrtimer *timer,
struct hrtimer_clock_base *base, int reprogram)
static int enqueue_hrtimer(struct hrtimer *timer,
struct hrtimer_clock_base *base)
{
struct rb_node **link = &base->active.rb_node;
struct rb_node *parent = NULL;
@@ -815,20 +806,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
* Insert the timer to the rbtree and check whether it
* replaces the first pending timer
*/
if (leftmost) {
/*
* Reprogram the clock event device. When the timer is already
* expired hrtimer_enqueue_reprogram has either called the
* callback or added it to the pending list and raised the
* softirq.
*
* This is a NOP for !HIGHRES
*/
if (reprogram && hrtimer_enqueue_reprogram(timer, base))
return;
if (leftmost)
base->first = &timer->node;
}
rb_link_node(&timer->node, parent, link);
rb_insert_color(&timer->node, &base->active);
@@ -837,6 +816,8 @@ static void enqueue_hrtimer(struct hrtimer *timer,
* state of a possibly running callback.
*/
timer->state |= HRTIMER_STATE_ENQUEUED;
return leftmost;
}
/*
@@ -913,7 +894,7 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
{
struct hrtimer_clock_base *base, *new_base;
unsigned long flags;
int ret;
int ret, leftmost;
base = lock_hrtimer_base(timer, &flags);
@@ -941,12 +922,16 @@ hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, unsigned long delta_n
timer_stats_hrtimer_set_start_info(timer);
leftmost = enqueue_hrtimer(timer, new_base);
/*
* Only allow reprogramming if the new base is on this CPU.
* (it might still be on another CPU if the timer was pending)
*
* XXX send_remote_softirq() ?
*/
enqueue_hrtimer(timer, new_base,
new_base->cpu_base == &__get_cpu_var(hrtimer_bases));
if (leftmost && new_base->cpu_base == &__get_cpu_var(hrtimer_bases))
hrtimer_enqueue_reprogram(timer, new_base);
unlock_hrtimer_base(timer, &flags);
@@ -1158,13 +1143,13 @@ static void __run_hrtimer(struct hrtimer *timer)
spin_lock(&cpu_base->lock);
/*
* Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid
* reprogramming of the event hardware. This happens at the end of this
* function anyway.
* Note: We clear the CALLBACK bit after enqueue_hrtimer and
* we do not reprogramm the event hardware. Happens either in
* hrtimer_start_range_ns() or in hrtimer_interrupt()
*/
if (restart != HRTIMER_NORESTART) {
BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
enqueue_hrtimer(timer, base, 0);
enqueue_hrtimer(timer, base);
}
timer->state &= ~HRTIMER_STATE_CALLBACK;
}
@@ -1244,6 +1229,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
}
}
/*
* local version of hrtimer_peek_ahead_timers() called with interrupts
* disabled.
*/
static void __hrtimer_peek_ahead_timers(void)
{
struct tick_device *td;
if (!hrtimer_hres_active())
return;
td = &__get_cpu_var(tick_cpu_device);
if (td && td->evtdev)
hrtimer_interrupt(td->evtdev);
}
/**
* hrtimer_peek_ahead_timers -- run soft-expired timers now
*
@@ -1255,20 +1256,23 @@ void hrtimer_interrupt(struct clock_event_device *dev)
*/
void hrtimer_peek_ahead_timers(void)
{
struct tick_device *td;
unsigned long flags;
if (!hrtimer_hres_active())
return;
local_irq_save(flags);
td = &__get_cpu_var(tick_cpu_device);
if (td && td->evtdev)
hrtimer_interrupt(td->evtdev);
__hrtimer_peek_ahead_timers();
local_irq_restore(flags);
}
#endif /* CONFIG_HIGH_RES_TIMERS */
static void run_hrtimer_softirq(struct softirq_action *h)
{
hrtimer_peek_ahead_timers();
}
#else /* CONFIG_HIGH_RES_TIMERS */
static inline void __hrtimer_peek_ahead_timers(void) { }
#endif /* !CONFIG_HIGH_RES_TIMERS */
/*
* Called from timer softirq every jiffy, expire hrtimers:
@@ -1514,39 +1518,36 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
__remove_hrtimer(timer, old_base, HRTIMER_STATE_MIGRATE, 0);
timer->base = new_base;
/*
* Enqueue the timers on the new cpu, but do not reprogram
* the timer as that would enable a deadlock between
* hrtimer_enqueue_reprogramm() running the timer and us still
* holding a nested base lock.
*
* Instead we tickle the hrtimer interrupt after the migration
* is done, which will run all expired timers and re-programm
* the timer device.
* Enqueue the timers on the new cpu. This does not
* reprogram the event device in case the timer
* expires before the earliest on this CPU, but we run
* hrtimer_interrupt after we migrated everything to
* sort out already expired timers and reprogram the
* event device.
*/
enqueue_hrtimer(timer, new_base, 0);
enqueue_hrtimer(timer, new_base);
/* Clear the migration state bit */
timer->state &= ~HRTIMER_STATE_MIGRATE;
}
}
static int migrate_hrtimers(int scpu)
static void migrate_hrtimers(int scpu)
{
struct hrtimer_cpu_base *old_base, *new_base;
int dcpu, i;
int i;
BUG_ON(cpu_online(scpu));
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = &get_cpu_var(hrtimer_bases);
dcpu = smp_processor_id();
tick_cancel_sched_timer(scpu);
local_irq_disable();
old_base = &per_cpu(hrtimer_bases, scpu);
new_base = &__get_cpu_var(hrtimer_bases);
/*
* The caller is globally serialized and nobody else
* takes two locks at once, deadlock is not possible.
*/
spin_lock_irq(&new_base->lock);
spin_lock(&new_base->lock);
spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
@@ -1555,15 +1556,11 @@ static int migrate_hrtimers(int scpu)
}
spin_unlock(&old_base->lock);
spin_unlock_irq(&new_base->lock);
put_cpu_var(hrtimer_bases);
spin_unlock(&new_base->lock);
return dcpu;
}
static void tickle_timers(void *arg)
{
hrtimer_peek_ahead_timers();
/* Check, if we got expired work to do */
__hrtimer_peek_ahead_timers();
local_irq_enable();
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -1584,11 +1581,8 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
case CPU_DEAD:
case CPU_DEAD_FROZEN:
{
int dcpu;
clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
dcpu = migrate_hrtimers(scpu);
smp_call_function_single(dcpu, tickle_timers, NULL, 0);
migrate_hrtimers(scpu);
break;
}
#endif
@@ -1609,6 +1603,9 @@ void __init hrtimers_init(void)
hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
register_cpu_notifier(&hrtimers_nb);
#ifdef CONFIG_HIGH_RES_TIMERS
open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
#endif
}
/**

View File

@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/async.h>
#include "internals.h"
@@ -34,15 +35,16 @@ unsigned long probe_irq_on(void)
unsigned int status;
int i;
/*
* quiesce the kernel, or at least the asynchronous portion
*/
async_synchronize_full();
mutex_lock(&probing_active);
/*
* something may have generated an irq long ago and we want to
* flush such a longstanding irq before considering it as spurious.
*/
for_each_irq_desc_reverse(i, desc) {
if (!desc)
continue;
spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
/*
@@ -71,9 +73,6 @@ unsigned long probe_irq_on(void)
* happened in the previous stage, it may have masked itself)
*/
for_each_irq_desc_reverse(i, desc) {
if (!desc)
continue;
spin_lock_irq(&desc->lock);
if (!desc->action && !(desc->status & IRQ_NOPROBE)) {
desc->status |= IRQ_AUTODETECT | IRQ_WAITING;
@@ -92,9 +91,6 @@ unsigned long probe_irq_on(void)
* Now filter out any obviously spurious interrupts
*/
for_each_irq_desc(i, desc) {
if (!desc)
continue;
spin_lock_irq(&desc->lock);
status = desc->status;
@@ -133,9 +129,6 @@ unsigned int probe_irq_mask(unsigned long val)
int i;
for_each_irq_desc(i, desc) {
if (!desc)
continue;
spin_lock_irq(&desc->lock);
status = desc->status;
@@ -178,9 +171,6 @@ int probe_irq_off(unsigned long val)
unsigned int status;
for_each_irq_desc(i, desc) {
if (!desc)
continue;
spin_lock_irq(&desc->lock);
status = desc->status;

View File

@@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
#ifdef CONFIG_SMP
cpus_setall(desc->affinity);
cpumask_setall(desc->affinity);
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_clear(desc->pending_mask);
#endif
#endif
spin_unlock_irqrestore(&desc->lock, flags);
}

View File

@@ -17,6 +17,7 @@
#include <linux/kernel_stat.h>
#include <linux/rculist.h>
#include <linux/hash.h>
#include <linux/bootmem.h>
#include "internals.h"
@@ -56,11 +57,8 @@ void handle_bad_irq(unsigned int irq, struct irq_desc *desc)
int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);
void __init __attribute__((weak)) arch_early_irq_init(void)
{
}
#ifdef CONFIG_SPARSE_IRQ
static struct irq_desc irq_desc_init = {
.irq = -1,
.status = IRQ_DISABLED,
@@ -68,9 +66,6 @@ static struct irq_desc irq_desc_init = {
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
};
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
@@ -90,13 +85,11 @@ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
desc->kstat_irqs = (unsigned int *)ptr;
}
void __attribute__((weak)) arch_init_chip_data(struct irq_desc *desc, int cpu)
{
}
static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
{
memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
spin_lock_init(&desc->lock);
desc->irq = irq;
#ifdef CONFIG_SMP
desc->cpu = cpu;
@@ -107,6 +100,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
printk(KERN_ERR "can not alloc kstat_irqs\n");
BUG_ON(1);
}
if (!init_alloc_desc_masks(desc, cpu, false)) {
printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
BUG_ON(1);
}
arch_init_chip_data(desc, cpu);
}
@@ -115,7 +112,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
*/
DEFINE_SPINLOCK(sparse_irq_lock);
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
struct irq_desc **irq_desc_ptrs __read_mostly;
static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS_LEGACY-1] = {
@@ -125,40 +122,52 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
}
};
/* FIXME: use bootmem alloc ...*/
static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
static unsigned int *kstat_irqs_legacy;
void __init early_irq_init(void)
int __init early_irq_init(void)
{
struct irq_desc *desc;
int legacy_count;
int i;
/* initialize nr_irqs based on nr_cpu_ids */
arch_probe_nr_irqs();
printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
/* allocate irq_desc_ptrs array based on nr_irqs */
irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
/* allocate based on nr_cpu_ids */
/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
sizeof(int));
for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
desc[i].kstat_irqs = kstat_irqs_legacy[i];
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
init_alloc_desc_masks(&desc[i], 0, true);
irq_desc_ptrs[i] = desc + i;
}
for (i = legacy_count; i < NR_IRQS; i++)
for (i = legacy_count; i < nr_irqs; i++)
irq_desc_ptrs[i] = NULL;
arch_early_irq_init();
return arch_early_irq_init();
}
struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
if (irq_desc_ptrs && irq < nr_irqs)
return irq_desc_ptrs[irq];
return NULL;
}
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
@@ -167,10 +176,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
unsigned long flags;
int node;
if (irq >= NR_IRQS) {
printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
irq, NR_IRQS);
WARN_ON(1);
if (irq >= nr_irqs) {
WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
irq, nr_irqs);
return NULL;
}
@@ -203,7 +211,7 @@ out_unlock:
return desc;
}
#else
#else /* !CONFIG_SPARSE_IRQ */
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
@@ -212,13 +220,37 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
}
};
#endif
int __init early_irq_init(void)
{
struct irq_desc *desc;
int count;
int i;
printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
for (i = 0; i < count; i++) {
desc[i].irq = i;
init_alloc_desc_masks(&desc[i], 0, true);
}
return arch_early_irq_init();
}
struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
{
return irq_to_desc(irq);
}
#endif /* !CONFIG_SPARSE_IRQ */
/*
* What should we do if we get a hw irq event on an illegal vector?
@@ -428,9 +460,6 @@ void early_init_irq_lock_class(void)
int i;
for_each_irq_desc(i, desc) {
if (!desc)
continue;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
}
}
@@ -439,7 +468,7 @@ void early_init_irq_lock_class(void)
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
return desc->kstat_irqs[cpu];
return desc ? desc->kstat_irqs[cpu] : 0;
}
#endif
EXPORT_SYMBOL(kstat_irqs_cpu);

View File

@@ -16,7 +16,14 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
extern spinlock_t sparse_irq_lock;
#ifdef CONFIG_SPARSE_IRQ
/* irq_desc_ptrs allocated at boot time */
extern struct irq_desc **irq_desc_ptrs;
#else
/* irq_desc_ptrs is a fixed size array */
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
#endif
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);

View File

@@ -16,8 +16,15 @@
#include "internals.h"
#ifdef CONFIG_SMP
cpumask_var_t irq_default_affinity;
cpumask_t irq_default_affinity = CPU_MASK_ALL;
static int init_irq_default_affinity(void)
{
alloc_cpumask_var(&irq_default_affinity, GFP_KERNEL);
cpumask_setall(irq_default_affinity);
return 0;
}
core_initcall(init_irq_default_affinity);
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
@@ -79,7 +86,7 @@ int irq_can_set_affinity(unsigned int irq)
* @cpumask: cpumask
*
*/
int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
{
struct irq_desc *desc = irq_to_desc(irq);
unsigned long flags;
@@ -91,14 +98,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
desc->affinity = cpumask;
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
} else {
desc->status |= IRQ_MOVE_PENDING;
desc->pending_mask = cpumask;
cpumask_copy(desc->pending_mask, cpumask);
}
#else
desc->affinity = cpumask;
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -112,26 +119,24 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
*/
int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
{
cpumask_t mask;
if (!irq_can_set_affinity(irq))
return 0;
cpus_and(mask, cpu_online_map, irq_default_affinity);
/*
* Preserve an userspace affinity setup, but make sure that
* one of the targets is online.
*/
if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
if (cpus_intersects(desc->affinity, cpu_online_map))
mask = desc->affinity;
if (cpumask_any_and(desc->affinity, cpu_online_mask)
< nr_cpu_ids)
goto set_affinity;
else
desc->status &= ~IRQ_AFFINITY_SET;
}
desc->affinity = mask;
desc->chip->set_affinity(irq, mask);
cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
set_affinity:
desc->chip->set_affinity(irq, desc->affinity);
return 0;
}

View File

@@ -4,7 +4,6 @@
void move_masked_irq(int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
cpumask_t tmp;
if (likely(!(desc->status & IRQ_MOVE_PENDING)))
return;
@@ -19,7 +18,7 @@ void move_masked_irq(int irq)
desc->status &= ~IRQ_MOVE_PENDING;
if (unlikely(cpus_empty(desc->pending_mask)))
if (unlikely(cpumask_empty(desc->pending_mask)))
return;
if (!desc->chip->set_affinity)
@@ -27,8 +26,6 @@ void move_masked_irq(int irq)
assert_spin_locked(&desc->lock);
cpus_and(tmp, desc->pending_mask, cpu_online_map);
/*
* If there was a valid mask to work with, please
* do the disable, re-program, enable sequence.
@@ -41,10 +38,13 @@ void move_masked_irq(int irq)
* For correct operation this depends on the caller
* masking the irqs.
*/
if (likely(!cpus_empty(tmp))) {
desc->chip->set_affinity(irq,tmp);
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
< nr_cpu_ids)) {
cpumask_and(desc->affinity,
desc->pending_mask, cpu_online_mask);
desc->chip->set_affinity(irq, desc->affinity);
}
cpus_clear(desc->pending_mask);
cpumask_clear(desc->pending_mask);
}
void move_native_irq(int irq)

View File

@@ -38,14 +38,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
old_desc->kstat_irqs = NULL;
}
static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
if (!init_alloc_desc_masks(desc, cpu, false)) {
printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
"for migration.\n", irq);
return false;
}
spin_lock_init(&desc->lock);
desc->cpu = cpu;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
arch_init_copy_chip_data(old_desc, desc, cpu);
return true;
}
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -74,15 +82,19 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
printk(KERN_DEBUG " move irq_desc for %d to cpu %d node %d\n",
irq, cpu, node);
if (!desc) {
printk(KERN_ERR "can not get new irq_desc for moving\n");
printk(KERN_ERR "irq %d: can not get new irq_desc "
"for migration.\n", irq);
/* still use old one */
desc = old_desc;
goto out_unlock;
}
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
/* still use old one */
kfree(desc);
desc = old_desc;
goto out_unlock;
}
irq_desc_ptrs[irq] = desc;
@@ -106,8 +118,6 @@ struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
return desc;
old_cpu = desc->cpu;
printk(KERN_DEBUG
"try to move irq_desc from cpu %d to %d\n", old_cpu, cpu);
if (old_cpu != cpu) {
node = cpu_to_node(cpu);
old_node = cpu_to_node(old_cpu);

View File

@@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir;
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
cpumask_t *mask = &desc->affinity;
const struct cpumask *mask = desc->affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PENDING)
mask = &desc->pending_mask;
mask = desc->pending_mask;
#endif
seq_cpumask(m, mask);
seq_putc(m, '\n');
@@ -40,33 +40,42 @@ static ssize_t irq_affinity_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *pos)
{
unsigned int irq = (int)(long)PDE(file->f_path.dentry->d_inode)->data;
cpumask_t new_value;
cpumask_var_t new_value;
int err;
if (!irq_to_desc(irq)->chip->set_affinity || no_irq_affinity ||
irq_balancing_disabled(irq))
return -EIO;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
goto free_cpumask;
if (!is_affinity_mask_valid(new_value))
return -EINVAL;
if (!is_affinity_mask_valid(new_value)) {
err = -EINVAL;
goto free_cpumask;
}
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
if (!cpus_intersects(new_value, cpu_online_map))
if (!cpumask_intersects(new_value, cpu_online_mask)) {
/* Special case for empty set - allow the architecture
code to set default SMP affinity. */
return irq_select_affinity_usr(irq) ? -EINVAL : count;
err = irq_select_affinity_usr(irq) ? -EINVAL : count;
} else {
irq_set_affinity(irq, new_value);
err = count;
}
irq_set_affinity(irq, new_value);
return count;
free_cpumask:
free_cpumask_var(new_value);
return err;
}
static int irq_affinity_proc_open(struct inode *inode, struct file *file)
@@ -84,7 +93,7 @@ static const struct file_operations irq_affinity_proc_fops = {
static int default_affinity_show(struct seq_file *m, void *v)
{
seq_cpumask(m, &irq_default_affinity);
seq_cpumask(m, irq_default_affinity);
seq_putc(m, '\n');
return 0;
}
@@ -92,27 +101,37 @@ static int default_affinity_show(struct seq_file *m, void *v)
static ssize_t default_affinity_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
cpumask_t new_value;
cpumask_var_t new_value;
int err;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
goto out;
if (!is_affinity_mask_valid(new_value))
return -EINVAL;
if (!is_affinity_mask_valid(new_value)) {
err = -EINVAL;
goto out;
}
/*
* Do not allow disabling IRQs completely - it's a too easy
* way to make the system unusable accidentally :-) At least
* one online CPU still has to be targeted.
*/
if (!cpus_intersects(new_value, cpu_online_map))
return -EINVAL;
if (!cpumask_intersects(new_value, cpu_online_mask)) {
err = -EINVAL;
goto out;
}
irq_default_affinity = new_value;
cpumask_copy(irq_default_affinity, new_value);
err = count;
return count;
out:
free_cpumask_var(new_value);
return err;
}
static int default_affinity_open(struct inode *inode, struct file *file)

View File

@@ -91,9 +91,6 @@ static int misrouted_irq(int irq)
int i, ok = 0;
for_each_irq_desc(i, desc) {
if (!desc)
continue;
if (!i)
continue;
@@ -115,8 +112,6 @@ static void poll_spurious_irqs(unsigned long dummy)
for_each_irq_desc(i, desc) {
unsigned int status;
if (!desc)
continue;
if (!i)
continue;

View File

@@ -1116,7 +1116,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
struct elf_prstatus prstatus;
u32 *buf;
if ((cpu < 0) || (cpu >= NR_CPUS))
if ((cpu < 0) || (cpu >= nr_cpu_ids))
return;
/* Using ELF notes here is opportunistic.

View File

@@ -51,8 +51,8 @@ char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
/**
* request_module - try to load a kernel module
* @fmt: printf style format string for the name of the module
* @varargs: arguements as specified in the format string
* @fmt: printf style format string for the name of the module
* @...: arguments as specified in the format string
*
* Load a module using the user mode module loader. The function returns
* zero on success or a negative errno code on failure. Note that a

View File

@@ -69,7 +69,7 @@ static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
/* NOTE: change this value only with kprobe_mutex held */
static bool kprobe_enabled;
DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
static struct {
spinlock_t lock ____cacheline_aligned_in_smp;
@@ -115,6 +115,7 @@ enum kprobe_slot_state {
SLOT_USED = 2,
};
static DEFINE_MUTEX(kprobe_insn_mutex); /* Protects kprobe_insn_pages */
static struct hlist_head kprobe_insn_pages;
static int kprobe_garbage_slots;
static int collect_garbage_slots(void);
@@ -144,10 +145,10 @@ loop_end:
}
/**
* get_insn_slot() - Find a slot on an executable page for an instruction.
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
kprobe_opcode_t __kprobes *get_insn_slot(void)
static kprobe_opcode_t __kprobes *__get_insn_slot(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
@@ -196,6 +197,15 @@ kprobe_opcode_t __kprobes *get_insn_slot(void)
return kip->insns;
}
kprobe_opcode_t __kprobes *get_insn_slot(void)
{
kprobe_opcode_t *ret;
mutex_lock(&kprobe_insn_mutex);
ret = __get_insn_slot();
mutex_unlock(&kprobe_insn_mutex);
return ret;
}
/* Return 1 if all garbages are collected, otherwise 0. */
static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
{
@@ -226,9 +236,13 @@ static int __kprobes collect_garbage_slots(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos, *next;
int safety;
/* Ensure no-one is preepmted on the garbages */
if (check_safety() != 0)
mutex_unlock(&kprobe_insn_mutex);
safety = check_safety();
mutex_lock(&kprobe_insn_mutex);
if (safety != 0)
return -EAGAIN;
hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
@@ -251,6 +265,7 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
struct kprobe_insn_page *kip;
struct hlist_node *pos;
mutex_lock(&kprobe_insn_mutex);
hlist_for_each_entry(kip, pos, &kprobe_insn_pages, hlist) {
if (kip->insns <= slot &&
slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
@@ -267,6 +282,8 @@ void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
if (dirty && ++kprobe_garbage_slots > INSNS_PER_PAGE)
collect_garbage_slots();
mutex_unlock(&kprobe_insn_mutex);
}
#endif
@@ -310,7 +327,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
if (kp->pre_handler) {
if (kp->pre_handler && !kprobe_gone(kp)) {
set_kprobe_instance(kp);
if (kp->pre_handler(kp, regs))
return 1;
@@ -326,7 +343,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
struct kprobe *kp;
list_for_each_entry_rcu(kp, &p->list, list) {
if (kp->post_handler) {
if (kp->post_handler && !kprobe_gone(kp)) {
set_kprobe_instance(kp);
kp->post_handler(kp, regs, flags);
reset_kprobe_instance();
@@ -393,7 +410,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
hlist_add_head(&ri->hlist, head);
}
void kretprobe_hash_lock(struct task_struct *tsk,
void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
struct hlist_head **head, unsigned long *flags)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
@@ -404,13 +421,15 @@ void kretprobe_hash_lock(struct task_struct *tsk,
spin_lock_irqsave(hlist_lock, *flags);
}
static void kretprobe_table_lock(unsigned long hash, unsigned long *flags)
static void __kprobes kretprobe_table_lock(unsigned long hash,
unsigned long *flags)
{
spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
spin_lock_irqsave(hlist_lock, *flags);
}
void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
unsigned long *flags)
{
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
spinlock_t *hlist_lock;
@@ -419,7 +438,7 @@ void kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags)
spin_unlock_irqrestore(hlist_lock, *flags);
}
void kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
{
spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
spin_unlock_irqrestore(hlist_lock, *flags);
@@ -526,9 +545,10 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
ap->addr = p->addr;
ap->pre_handler = aggr_pre_handler;
ap->fault_handler = aggr_fault_handler;
if (p->post_handler)
/* We don't care the kprobe which has gone. */
if (p->post_handler && !kprobe_gone(p))
ap->post_handler = aggr_post_handler;
if (p->break_handler)
if (p->break_handler && !kprobe_gone(p))
ap->break_handler = aggr_break_handler;
INIT_LIST_HEAD(&ap->list);
@@ -547,17 +567,41 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
int ret = 0;
struct kprobe *ap;
if (kprobe_gone(old_p)) {
/*
* Attempting to insert new probe at the same location that
* had a probe in the module vaddr area which already
* freed. So, the instruction slot has already been
* released. We need a new slot for the new probe.
*/
ret = arch_prepare_kprobe(old_p);
if (ret)
return ret;
}
if (old_p->pre_handler == aggr_pre_handler) {
copy_kprobe(old_p, p);
ret = add_new_kprobe(old_p, p);
ap = old_p;
} else {
ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
if (!ap)
if (!ap) {
if (kprobe_gone(old_p))
arch_remove_kprobe(old_p);
return -ENOMEM;
}
add_aggr_kprobe(ap, old_p);
copy_kprobe(ap, p);
ret = add_new_kprobe(ap, p);
}
if (kprobe_gone(old_p)) {
/*
* If the old_p has gone, its breakpoint has been disarmed.
* We have to arm it again after preparing real kprobes.
*/
ap->flags &= ~KPROBE_FLAG_GONE;
if (kprobe_enabled)
arch_arm_kprobe(ap);
}
return ret;
}
@@ -600,8 +644,7 @@ static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
return (kprobe_opcode_t *)(((char *)addr) + p->offset);
}
static int __kprobes __register_kprobe(struct kprobe *p,
unsigned long called_from)
int __kprobes register_kprobe(struct kprobe *p)
{
int ret = 0;
struct kprobe *old_p;
@@ -620,28 +663,30 @@ static int __kprobes __register_kprobe(struct kprobe *p,
return -EINVAL;
}
p->mod_refcounted = 0;
p->flags = 0;
/*
* Check if are we probing a module.
*/
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
struct module *calling_mod;
calling_mod = __module_text_address(called_from);
/*
* We must allow modules to probe themself and in this case
* avoid incrementing the module refcount, so as to allow
* unloading of self probing modules.
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
*/
if (calling_mod && calling_mod != probed_mod) {
if (unlikely(!try_module_get(probed_mod))) {
preempt_enable();
return -EINVAL;
}
p->mod_refcounted = 1;
} else
probed_mod = NULL;
if (unlikely(!try_module_get(probed_mod))) {
preempt_enable();
return -EINVAL;
}
/*
* If the module freed .init.text, we couldn't insert
* kprobes in there.
*/
if (within_module_init((unsigned long)p->addr, probed_mod) &&
probed_mod->state != MODULE_STATE_COMING) {
module_put(probed_mod);
preempt_enable();
return -EINVAL;
}
}
preempt_enable();
@@ -668,8 +713,9 @@ static int __kprobes __register_kprobe(struct kprobe *p,
out:
mutex_unlock(&kprobe_mutex);
if (ret && probed_mod)
if (probed_mod)
module_put(probed_mod);
return ret;
}
@@ -697,16 +743,16 @@ valid_p:
list_is_singular(&old_p->list))) {
/*
* Only probe on the hash list. Disarm only if kprobes are
* enabled - otherwise, the breakpoint would already have
* been removed. We save on flushing icache.
* enabled and not gone - otherwise, the breakpoint would
* already have been removed. We save on flushing icache.
*/
if (kprobe_enabled)
if (kprobe_enabled && !kprobe_gone(old_p))
arch_disarm_kprobe(p);
hlist_del_rcu(&old_p->hlist);
} else {
if (p->break_handler)
if (p->break_handler && !kprobe_gone(p))
old_p->break_handler = NULL;
if (p->post_handler) {
if (p->post_handler && !kprobe_gone(p)) {
list_for_each_entry_rcu(list_p, &old_p->list, list) {
if ((list_p != p) && (list_p->post_handler))
goto noclean;
@@ -721,39 +767,27 @@ noclean:
static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
{
struct module *mod;
struct kprobe *old_p;
if (p->mod_refcounted) {
/*
* Since we've already incremented refcount,
* we don't need to disable preemption.
*/
mod = module_text_address((unsigned long)p->addr);
if (mod)
module_put(mod);
}
if (list_empty(&p->list) || list_is_singular(&p->list)) {
if (!list_empty(&p->list)) {
/* "p" is the last child of an aggr_kprobe */
old_p = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
kfree(old_p);
}
if (list_empty(&p->list))
arch_remove_kprobe(p);
else if (list_is_singular(&p->list)) {
/* "p" is the last child of an aggr_kprobe */
old_p = list_entry(p->list.next, struct kprobe, list);
list_del(&p->list);
arch_remove_kprobe(old_p);
kfree(old_p);
}
}
static int __register_kprobes(struct kprobe **kps, int num,
unsigned long called_from)
int __kprobes register_kprobes(struct kprobe **kps, int num)
{
int i, ret = 0;
if (num <= 0)
return -EINVAL;
for (i = 0; i < num; i++) {
ret = __register_kprobe(kps[i], called_from);
ret = register_kprobe(kps[i]);
if (ret < 0) {
if (i > 0)
unregister_kprobes(kps, i);
@@ -763,26 +797,11 @@ static int __register_kprobes(struct kprobe **kps, int num,
return ret;
}
/*
* Registration and unregistration functions for kprobe.
*/
int __kprobes register_kprobe(struct kprobe *p)
{
return __register_kprobes(&p, 1,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_kprobe(struct kprobe *p)
{
unregister_kprobes(&p, 1);
}
int __kprobes register_kprobes(struct kprobe **kps, int num)
{
return __register_kprobes(kps, num,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_kprobes(struct kprobe **kps, int num)
{
int i;
@@ -811,8 +830,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}
static int __register_jprobes(struct jprobe **jps, int num,
unsigned long called_from)
int __kprobes register_jprobes(struct jprobe **jps, int num)
{
struct jprobe *jp;
int ret = 0, i;
@@ -830,7 +848,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
/* Todo: Verify probepoint is a function entry point */
jp->kp.pre_handler = setjmp_pre_handler;
jp->kp.break_handler = longjmp_break_handler;
ret = __register_kprobe(&jp->kp, called_from);
ret = register_kprobe(&jp->kp);
}
if (ret < 0) {
if (i > 0)
@@ -843,8 +861,7 @@ static int __register_jprobes(struct jprobe **jps, int num,
int __kprobes register_jprobe(struct jprobe *jp)
{
return __register_jprobes(&jp, 1,
(unsigned long)__builtin_return_address(0));
return register_jprobes(&jp, 1);
}
void __kprobes unregister_jprobe(struct jprobe *jp)
@@ -852,12 +869,6 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
unregister_jprobes(&jp, 1);
}
int __kprobes register_jprobes(struct jprobe **jps, int num)
{
return __register_jprobes(jps, num,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_jprobes(struct jprobe **jps, int num)
{
int i;
@@ -920,8 +931,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
return 0;
}
static int __kprobes __register_kretprobe(struct kretprobe *rp,
unsigned long called_from)
int __kprobes register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
struct kretprobe_instance *inst;
@@ -967,21 +977,20 @@ static int __kprobes __register_kretprobe(struct kretprobe *rp,
rp->nmissed = 0;
/* Establish function entry probe point */
ret = __register_kprobe(&rp->kp, called_from);
ret = register_kprobe(&rp->kp);
if (ret != 0)
free_rp_inst(rp);
return ret;
}
static int __register_kretprobes(struct kretprobe **rps, int num,
unsigned long called_from)
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
int ret = 0, i;
if (num <= 0)
return -EINVAL;
for (i = 0; i < num; i++) {
ret = __register_kretprobe(rps[i], called_from);
ret = register_kretprobe(rps[i]);
if (ret < 0) {
if (i > 0)
unregister_kretprobes(rps, i);
@@ -991,23 +1000,11 @@ static int __register_kretprobes(struct kretprobe **rps, int num,
return ret;
}
int __kprobes register_kretprobe(struct kretprobe *rp)
{
return __register_kretprobes(&rp, 1,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_kretprobe(struct kretprobe *rp)
{
unregister_kretprobes(&rp, 1);
}
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
{
return __register_kretprobes(rps, num,
(unsigned long)__builtin_return_address(0));
}
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
{
int i;
@@ -1055,6 +1052,72 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
#endif /* CONFIG_KRETPROBES */
/* Set the kprobe gone and remove its instruction buffer. */
static void __kprobes kill_kprobe(struct kprobe *p)
{
struct kprobe *kp;
p->flags |= KPROBE_FLAG_GONE;
if (p->pre_handler == aggr_pre_handler) {
/*
* If this is an aggr_kprobe, we have to list all the
* chained probes and mark them GONE.
*/
list_for_each_entry_rcu(kp, &p->list, list)
kp->flags |= KPROBE_FLAG_GONE;
p->post_handler = NULL;
p->break_handler = NULL;
}
/*
* Here, we can remove insn_slot safely, because no thread calls
* the original probed function (which will be freed soon) any more.
*/
arch_remove_kprobe(p);
}
/* Module notifier call back, checking kprobes on the module */
static int __kprobes kprobes_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct module *mod = data;
struct hlist_head *head;
struct hlist_node *node;
struct kprobe *p;
unsigned int i;
int checkcore = (val == MODULE_STATE_GOING);
if (val != MODULE_STATE_GOING && val != MODULE_STATE_LIVE)
return NOTIFY_DONE;
/*
* When MODULE_STATE_GOING was notified, both of module .text and
* .init.text sections would be freed. When MODULE_STATE_LIVE was
* notified, only .init.text section would be freed. We need to
* disable kprobes which have been inserted in the sections.
*/
mutex_lock(&kprobe_mutex);
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
if (within_module_init((unsigned long)p->addr, mod) ||
(checkcore &&
within_module_core((unsigned long)p->addr, mod))) {
/*
* The vaddr this probe is installed will soon
* be vfreed buy not synced to disk. Hence,
* disarming the breakpoint isn't needed.
*/
kill_kprobe(p);
}
}
mutex_unlock(&kprobe_mutex);
return NOTIFY_DONE;
}
static struct notifier_block kprobe_module_nb = {
.notifier_call = kprobes_module_callback,
.priority = 0
};
static int __init init_kprobes(void)
{
int i, err = 0;
@@ -1111,6 +1174,9 @@ static int __init init_kprobes(void)
err = arch_init_kprobes();
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
if (!err)
err = register_module_notifier(&kprobe_module_nb);
kprobes_initialized = (err == 0);
if (!err)
@@ -1131,10 +1197,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
else
kprobe_type = "k";
if (sym)
seq_printf(pi, "%p %s %s+0x%x %s\n", p->addr, kprobe_type,
sym, offset, (modname ? modname : " "));
seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type,
sym, offset, (modname ? modname : " "),
(kprobe_gone(p) ? "[GONE]" : ""));
else
seq_printf(pi, "%p %s %p\n", p->addr, kprobe_type, p->addr);
seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr,
(kprobe_gone(p) ? "[GONE]" : ""));
}
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1215,7 +1283,8 @@ static void __kprobes enable_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist)
arch_arm_kprobe(p);
if (!kprobe_gone(p))
arch_arm_kprobe(p);
}
kprobe_enabled = true;
@@ -1244,7 +1313,7 @@ static void __kprobes disable_all_kprobes(void)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
hlist_for_each_entry_rcu(p, node, head, hlist) {
if (!arch_trampoline_kprobe(p))
if (!arch_trampoline_kprobe(p) && !kprobe_gone(p))
arch_disarm_kprobe(p);
}
}

View File

@@ -24,7 +24,7 @@ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
static struct kobj_attribute _name##_attr = \
__ATTR(_name, 0644, _name##_show, _name##_store)
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
#if defined(CONFIG_HOTPLUG)
/* current uevent sequence number */
static ssize_t uevent_seqnum_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -137,7 +137,7 @@ struct kobject *kernel_kobj;
EXPORT_SYMBOL_GPL(kernel_kobj);
static struct attribute * kernel_attrs[] = {
#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
#if defined(CONFIG_HOTPLUG)
&uevent_seqnum_attr.attr,
&uevent_helper_attr.attr,
#endif

View File

@@ -43,7 +43,6 @@
#include <linux/device.h>
#include <linux/string.h>
#include <linux/mutex.h>
#include <linux/unwind.h>
#include <linux/rculist.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -51,6 +50,7 @@
#include <asm/sections.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/async.h>
#if 0
#define DEBUGP printk
@@ -757,8 +757,16 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
return -EFAULT;
name[MODULE_NAME_LEN-1] = '\0';
if (mutex_lock_interruptible(&module_mutex) != 0)
return -EINTR;
/* Create stop_machine threads since free_module relies on
* a non-failing stop_machine call. */
ret = stop_machine_create();
if (ret)
return ret;
if (mutex_lock_interruptible(&module_mutex) != 0) {
ret = -EINTR;
goto out_stop;
}
mod = find_module(name);
if (!mod) {
@@ -809,6 +817,7 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
mod->exit();
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_GOING, mod);
async_synchronize_full();
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
@@ -817,10 +826,12 @@ sys_delete_module(const char __user *name_user, unsigned int flags)
out:
mutex_unlock(&module_mutex);
out_stop:
stop_machine_destroy();
return ret;
}
static void print_unload_info(struct seq_file *m, struct module *mod)
static inline void print_unload_info(struct seq_file *m, struct module *mod)
{
struct module_use *use;
int printed_something = 0;
@@ -893,7 +904,7 @@ void module_put(struct module *module)
EXPORT_SYMBOL(module_put);
#else /* !CONFIG_MODULE_UNLOAD */
static void print_unload_info(struct seq_file *m, struct module *mod)
static inline void print_unload_info(struct seq_file *m, struct module *mod)
{
/* We don't know the usage count, or what modules are using. */
seq_printf(m, " - -");
@@ -1439,8 +1450,6 @@ static void free_module(struct module *mod)
remove_sect_attrs(mod);
mod_kobject_remove(mod);
unwind_remove_table(mod->unwind_info, 0);
/* Arch-specific cleanup. */
module_arch_cleanup(mod);
@@ -1578,11 +1587,21 @@ static int simplify_symbols(Elf_Shdr *sechdrs,
return ret;
}
/* Additional bytes needed by arch in front of individual sections */
unsigned int __weak arch_mod_section_prepend(struct module *mod,
unsigned int section)
{
/* default implementation just returns zero */
return 0;
}
/* Update size with this section: return offset. */
static long get_offset(unsigned int *size, Elf_Shdr *sechdr)
static long get_offset(struct module *mod, unsigned int *size,
Elf_Shdr *sechdr, unsigned int section)
{
long ret;
*size += arch_mod_section_prepend(mod, section);
ret = ALIGN(*size, sechdr->sh_addralign ?: 1);
*size = ret + sechdr->sh_size;
return ret;
@@ -1622,7 +1641,7 @@ static void layout_sections(struct module *mod,
|| strncmp(secstrings + s->sh_name,
".init", 5) == 0)
continue;
s->sh_entsize = get_offset(&mod->core_size, s);
s->sh_entsize = get_offset(mod, &mod->core_size, s, i);
DEBUGP("\t%s\n", secstrings + s->sh_name);
}
if (m == 0)
@@ -1640,7 +1659,7 @@ static void layout_sections(struct module *mod,
|| strncmp(secstrings + s->sh_name,
".init", 5) != 0)
continue;
s->sh_entsize = (get_offset(&mod->init_size, s)
s->sh_entsize = (get_offset(mod, &mod->init_size, s, i)
| INIT_OFFSET_MASK);
DEBUGP("\t%s\n", secstrings + s->sh_name);
}
@@ -1725,15 +1744,15 @@ static const struct kernel_symbol *lookup_symbol(const char *name,
return NULL;
}
static int is_exported(const char *name, const struct module *mod)
static int is_exported(const char *name, unsigned long value,
const struct module *mod)
{
if (!mod && lookup_symbol(name, __start___ksymtab, __stop___ksymtab))
return 1;
const struct kernel_symbol *ks;
if (!mod)
ks = lookup_symbol(name, __start___ksymtab, __stop___ksymtab);
else
if (mod && lookup_symbol(name, mod->syms, mod->syms + mod->num_syms))
return 1;
else
return 0;
ks = lookup_symbol(name, mod->syms, mod->syms + mod->num_syms);
return ks != NULL && ks->value == value;
}
/* As per nm */
@@ -1847,7 +1866,6 @@ static noinline struct module *load_module(void __user *umod,
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
unsigned int unwindex = 0;
unsigned int num_kp, num_mcount;
struct kernel_param *kp;
struct module *mod;
@@ -1865,6 +1883,13 @@ static noinline struct module *load_module(void __user *umod,
/* vmalloc barfs on "unusual" numbers. Check here */
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
return ERR_PTR(-ENOMEM);
/* Create stop_machine threads since the error path relies on
* a non-failing stop_machine call. */
err = stop_machine_create();
if (err)
goto free_hdr;
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
@@ -1930,9 +1955,6 @@ static noinline struct module *load_module(void __user *umod,
versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);
#ifdef ARCH_UNWIND_SECTION_NAME
unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
#endif
/* Don't keep modinfo and version sections. */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1942,8 +1964,6 @@ static noinline struct module *load_module(void __user *umod,
sechdrs[symindex].sh_flags |= SHF_ALLOC;
sechdrs[strindex].sh_flags |= SHF_ALLOC;
#endif
if (unwindex)
sechdrs[unwindex].sh_flags |= SHF_ALLOC;
/* Check module struct version now, before we try to use module. */
if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2240,14 +2260,10 @@ static noinline struct module *load_module(void __user *umod,
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
/* Size of section 0 is 0, so this works well if no unwind info. */
mod->unwind_info = unwind_add_table(mod,
(void *)sechdrs[unwindex].sh_addr,
sechdrs[unwindex].sh_size);
/* Get rid of temporary copy */
vfree(hdr);
stop_machine_destroy();
/* Done! */
return mod;
@@ -2270,6 +2286,7 @@ static noinline struct module *load_module(void __user *umod,
kfree(args);
free_hdr:
vfree(hdr);
stop_machine_destroy();
return ERR_PTR(err);
truncated:
@@ -2337,11 +2354,12 @@ sys_init_module(void __user *umod,
/* Now it's a first class citizen! Wake up anyone waiting for it. */
mod->state = MODULE_STATE_LIVE;
wake_up(&module_wq);
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
mutex_lock(&module_mutex);
/* Drop initial reference. */
module_put(mod);
unwind_remove_table(mod->unwind_info, 1);
module_free(mod, mod->module_init);
mod->module_init = NULL;
mod->init_size = 0;
@@ -2376,7 +2394,7 @@ static const char *get_ksymbol(struct module *mod,
unsigned long nextval;
/* At worse, next value is at end of module */
if (within(addr, mod->module_init, mod->init_size))
if (within_module_init(addr, mod))
nextval = (unsigned long)mod->module_init+mod->init_text_size;
else
nextval = (unsigned long)mod->module_core+mod->core_text_size;
@@ -2424,8 +2442,8 @@ const char *module_address_lookup(unsigned long addr,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size)
|| within(addr, mod->module_core, mod->core_size)) {
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
if (modname)
*modname = mod->name;
ret = get_ksymbol(mod, addr, size, offset);
@@ -2447,8 +2465,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size) ||
within(addr, mod->module_core, mod->core_size)) {
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
sym = get_ksymbol(mod, addr, NULL, NULL);
@@ -2471,8 +2489,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_init, mod->init_size) ||
within(addr, mod->module_core, mod->core_size)) {
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
sym = get_ksymbol(mod, addr, size, offset);
@@ -2504,7 +2522,7 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
KSYM_NAME_LEN);
strlcpy(module_name, mod->name, MODULE_NAME_LEN);
*exported = is_exported(name, mod);
*exported = is_exported(name, *value, mod);
preempt_enable();
return 0;
}
@@ -2691,7 +2709,7 @@ int is_module_address(unsigned long addr)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (within(addr, mod->module_core, mod->core_size)) {
if (within_module_core(addr, mod)) {
preempt_enable();
return 1;
}

View File

@@ -13,7 +13,6 @@
struct ns_cgroup {
struct cgroup_subsys_state css;
spinlock_t lock;
};
struct cgroup_subsys ns_subsys;
@@ -84,7 +83,6 @@ static struct cgroup_subsys_state *ns_create(struct cgroup_subsys *ss,
ns_cgroup = kzalloc(sizeof(*ns_cgroup), GFP_KERNEL);
if (!ns_cgroup)
return ERR_PTR(-ENOMEM);
spin_lock_init(&ns_cgroup->lock);
return &ns_cgroup->css;
}

View File

@@ -302,6 +302,8 @@ static int init_oops_id(void)
{
if (!oops_id)
get_random_bytes(&oops_id, sizeof(oops_id));
else
oops_id++;
return 0;
}

View File

@@ -474,8 +474,14 @@ pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns)
}
EXPORT_SYMBOL(task_session_nr_ns);
struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
return ns_of_pid(task_pid(tsk));
}
EXPORT_SYMBOL_GPL(task_active_pid_ns);
/*
* Used by proc to find the first pid that is greater then or equal to nr.
* Used by proc to find the first pid that is greater than or equal to nr.
*
* If there is a pid at nr this function is exactly the same as find_pid_ns.
*/

View File

@@ -258,12 +258,12 @@ int hibernation_snapshot(int platform_mode)
{
int error;
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
error = platform_begin(platform_mode);
if (error)
return error;
error = platform_begin(platform_mode);
/* Free memory before shutting down devices. */
error = swsusp_shrink_memory();
if (error)
goto Close;

View File

@@ -615,7 +615,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
/* this may fail if the RTC hasn't been initialized */
status = rtc_read_time(rtc, &alm.time);
if (status < 0) {
printk(err_readtime, rtc->dev.bus_id, status);
printk(err_readtime, dev_name(&rtc->dev), status);
return;
}
rtc_tm_to_time(&alm.time, &now);
@@ -626,7 +626,7 @@ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state)
status = rtc_set_alarm(rtc, &alm);
if (status < 0) {
printk(err_wakealarm, rtc->dev.bus_id, status);
printk(err_wakealarm, dev_name(&rtc->dev), status);
return;
}
@@ -660,7 +660,7 @@ static int __init has_wakealarm(struct device *dev, void *name_ptr)
if (!device_may_wakeup(candidate->dev.parent))
return 0;
*(char **)name_ptr = dev->bus_id;
*(const char **)name_ptr = dev_name(dev);
return 1;
}

View File

@@ -27,7 +27,7 @@ static DECLARE_WORK(poweroff_work, do_poweroff);
static void handle_poweroff(int key, struct tty_struct *tty)
{
/* run sysrq poweroff on boot cpu */
schedule_work_on(first_cpu(cpu_online_map), &poweroff_work);
schedule_work_on(cpumask_first(cpu_online_mask), &poweroff_work);
}
static struct sysrq_key_op sysrq_poweroff_op = {

View File

@@ -25,6 +25,7 @@
#include <linux/syscalls.h>
#include <linux/console.h>
#include <linux/highmem.h>
#include <linux/list.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -192,12 +193,6 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
return ret;
}
static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
{
free_list_of_pages(ca->chain, clear_page_nosave);
memset(ca, 0, sizeof(struct chain_allocator));
}
/**
* Data types related to memory bitmaps.
*
@@ -233,7 +228,7 @@ static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
#define BM_BITS_PER_BLOCK (PAGE_SIZE << 3)
struct bm_block {
struct bm_block *next; /* next element of the list */
struct list_head hook; /* hook into a list of bitmap blocks */
unsigned long start_pfn; /* pfn represented by the first bit */
unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
unsigned long *data; /* bitmap representing pages */
@@ -244,24 +239,15 @@ static inline unsigned long bm_block_bits(struct bm_block *bb)
return bb->end_pfn - bb->start_pfn;
}
struct zone_bitmap {
struct zone_bitmap *next; /* next element of the list */
unsigned long start_pfn; /* minimal pfn in this zone */
unsigned long end_pfn; /* maximal pfn in this zone plus 1 */
struct bm_block *bm_blocks; /* list of bitmap blocks */
struct bm_block *cur_block; /* recently used bitmap block */
};
/* strcut bm_position is used for browsing memory bitmaps */
struct bm_position {
struct zone_bitmap *zone_bm;
struct bm_block *block;
int bit;
};
struct memory_bitmap {
struct zone_bitmap *zone_bm_list; /* list of zone bitmaps */
struct list_head blocks; /* list of bitmap blocks */
struct linked_page *p_list; /* list of pages used to store zone
* bitmap objects and bitmap block
* objects
@@ -273,11 +259,7 @@ struct memory_bitmap {
static void memory_bm_position_reset(struct memory_bitmap *bm)
{
struct zone_bitmap *zone_bm;
zone_bm = bm->zone_bm_list;
bm->cur.zone_bm = zone_bm;
bm->cur.block = zone_bm->bm_blocks;
bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
bm->cur.bit = 0;
}
@@ -285,151 +267,184 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
/**
* create_bm_block_list - create a list of block bitmap objects
* @nr_blocks - number of blocks to allocate
* @list - list to put the allocated blocks into
* @ca - chain allocator to be used for allocating memory
*/
static inline struct bm_block *
create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
static int create_bm_block_list(unsigned long pages,
struct list_head *list,
struct chain_allocator *ca)
{
struct bm_block *bblist = NULL;
unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
while (nr_blocks-- > 0) {
struct bm_block *bb;
bb = chain_alloc(ca, sizeof(struct bm_block));
if (!bb)
return NULL;
bb->next = bblist;
bblist = bb;
return -ENOMEM;
list_add(&bb->hook, list);
}
return 0;
}
struct mem_extent {
struct list_head hook;
unsigned long start;
unsigned long end;
};
/**
* free_mem_extents - free a list of memory extents
* @list - list of extents to empty
*/
static void free_mem_extents(struct list_head *list)
{
struct mem_extent *ext, *aux;
list_for_each_entry_safe(ext, aux, list, hook) {
list_del(&ext->hook);
kfree(ext);
}
return bblist;
}
/**
* create_zone_bm_list - create a list of zone bitmap objects
* create_mem_extents - create a list of memory extents representing
* contiguous ranges of PFNs
* @list - list to put the extents into
* @gfp_mask - mask to use for memory allocations
*/
static inline struct zone_bitmap *
create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
{
struct zone_bitmap *zbmlist = NULL;
struct zone *zone;
while (nr_zones-- > 0) {
struct zone_bitmap *zbm;
INIT_LIST_HEAD(list);
zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
if (!zbm)
return NULL;
for_each_zone(zone) {
unsigned long zone_start, zone_end;
struct mem_extent *ext, *cur, *aux;
zbm->next = zbmlist;
zbmlist = zbm;
if (!populated_zone(zone))
continue;
zone_start = zone->zone_start_pfn;
zone_end = zone->zone_start_pfn + zone->spanned_pages;
list_for_each_entry(ext, list, hook)
if (zone_start <= ext->end)
break;
if (&ext->hook == list || zone_end < ext->start) {
/* New extent is necessary */
struct mem_extent *new_ext;
new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
if (!new_ext) {
free_mem_extents(list);
return -ENOMEM;
}
new_ext->start = zone_start;
new_ext->end = zone_end;
list_add_tail(&new_ext->hook, &ext->hook);
continue;
}
/* Merge this zone's range of PFNs with the existing one */
if (zone_start < ext->start)
ext->start = zone_start;
if (zone_end > ext->end)
ext->end = zone_end;
/* More merging may be possible */
cur = ext;
list_for_each_entry_safe_continue(cur, aux, list, hook) {
if (zone_end < cur->start)
break;
if (zone_end < cur->end)
ext->end = cur->end;
list_del(&cur->hook);
kfree(cur);
}
}
return zbmlist;
return 0;
}
/**
* memory_bm_create - allocate memory for a memory bitmap
*/
static int
memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
{
struct chain_allocator ca;
struct zone *zone;
struct zone_bitmap *zone_bm;
struct bm_block *bb;
unsigned int nr;
struct list_head mem_extents;
struct mem_extent *ext;
int error;
chain_init(&ca, gfp_mask, safe_needed);
INIT_LIST_HEAD(&bm->blocks);
/* Compute the number of zones */
nr = 0;
for_each_zone(zone)
if (populated_zone(zone))
nr++;
error = create_mem_extents(&mem_extents, gfp_mask);
if (error)
return error;
/* Allocate the list of zones bitmap objects */
zone_bm = create_zone_bm_list(nr, &ca);
bm->zone_bm_list = zone_bm;
if (!zone_bm) {
chain_free(&ca, PG_UNSAFE_CLEAR);
return -ENOMEM;
}
list_for_each_entry(ext, &mem_extents, hook) {
struct bm_block *bb;
unsigned long pfn = ext->start;
unsigned long pages = ext->end - ext->start;
/* Initialize the zone bitmap objects */
for_each_zone(zone) {
unsigned long pfn;
bb = list_entry(bm->blocks.prev, struct bm_block, hook);
if (!populated_zone(zone))
continue;
error = create_bm_block_list(pages, bm->blocks.prev, &ca);
if (error)
goto Error;
zone_bm->start_pfn = zone->zone_start_pfn;
zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
/* Allocate the list of bitmap block objects */
nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
bb = create_bm_block_list(nr, &ca);
zone_bm->bm_blocks = bb;
zone_bm->cur_block = bb;
if (!bb)
goto Free;
nr = zone->spanned_pages;
pfn = zone->zone_start_pfn;
/* Initialize the bitmap block objects */
while (bb) {
unsigned long *ptr;
ptr = get_image_page(gfp_mask, safe_needed);
bb->data = ptr;
if (!ptr)
goto Free;
list_for_each_entry_continue(bb, &bm->blocks, hook) {
bb->data = get_image_page(gfp_mask, safe_needed);
if (!bb->data) {
error = -ENOMEM;
goto Error;
}
bb->start_pfn = pfn;
if (nr >= BM_BITS_PER_BLOCK) {
if (pages >= BM_BITS_PER_BLOCK) {
pfn += BM_BITS_PER_BLOCK;
nr -= BM_BITS_PER_BLOCK;
pages -= BM_BITS_PER_BLOCK;
} else {
/* This is executed only once in the loop */
pfn += nr;
pfn += pages;
}
bb->end_pfn = pfn;
bb = bb->next;
}
zone_bm = zone_bm->next;
}
bm->p_list = ca.chain;
memory_bm_position_reset(bm);
return 0;
Exit:
free_mem_extents(&mem_extents);
return error;
Free:
Error:
bm->p_list = ca.chain;
memory_bm_free(bm, PG_UNSAFE_CLEAR);
return -ENOMEM;
goto Exit;
}
/**
* memory_bm_free - free memory occupied by the memory bitmap @bm
*/
static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
{
struct zone_bitmap *zone_bm;
struct bm_block *bb;
/* Free the list of bit blocks for each zone_bitmap object */
zone_bm = bm->zone_bm_list;
while (zone_bm) {
struct bm_block *bb;
list_for_each_entry(bb, &bm->blocks, hook)
if (bb->data)
free_image_page(bb->data, clear_nosave_free);
bb = zone_bm->bm_blocks;
while (bb) {
if (bb->data)
free_image_page(bb->data, clear_nosave_free);
bb = bb->next;
}
zone_bm = zone_bm->next;
}
free_list_of_pages(bm->p_list, clear_nosave_free);
bm->zone_bm_list = NULL;
INIT_LIST_HEAD(&bm->blocks);
}
/**
@@ -437,38 +452,33 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
* to given pfn. The cur_zone_bm member of @bm and the cur_block member
* of @bm->cur_zone_bm are updated.
*/
static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
void **addr, unsigned int *bit_nr)
{
struct zone_bitmap *zone_bm;
struct bm_block *bb;
/* Check if the pfn is from the current zone */
zone_bm = bm->cur.zone_bm;
if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
zone_bm = bm->zone_bm_list;
/* We don't assume that the zones are sorted by pfns */
while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
zone_bm = zone_bm->next;
if (!zone_bm)
return -EFAULT;
}
bm->cur.zone_bm = zone_bm;
}
/* Check if the pfn corresponds to the current bitmap block */
bb = zone_bm->cur_block;
/*
* Check if the pfn corresponds to the current bitmap block and find
* the block where it fits if this is not the case.
*/
bb = bm->cur.block;
if (pfn < bb->start_pfn)
bb = zone_bm->bm_blocks;
list_for_each_entry_continue_reverse(bb, &bm->blocks, hook)
if (pfn >= bb->start_pfn)
break;
while (pfn >= bb->end_pfn) {
bb = bb->next;
if (pfn >= bb->end_pfn)
list_for_each_entry_continue(bb, &bm->blocks, hook)
if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
break;
BUG_ON(!bb);
}
zone_bm->cur_block = bb;
if (&bb->hook == &bm->blocks)
return -EFAULT;
/* The block has been found */
bm->cur.block = bb;
pfn -= bb->start_pfn;
bm->cur.bit = pfn + 1;
*bit_nr = pfn;
*addr = bb->data;
return 0;
@@ -519,6 +529,14 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
return test_bit(bit, addr);
}
static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
{
void *addr;
unsigned int bit;
return !memory_bm_find_bit(bm, pfn, &addr, &bit);
}
/**
* memory_bm_next_pfn - find the pfn that corresponds to the next set bit
* in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is
@@ -530,29 +548,21 @@ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
{
struct zone_bitmap *zone_bm;
struct bm_block *bb;
int bit;
bb = bm->cur.block;
do {
bb = bm->cur.block;
do {
bit = bm->cur.bit;
bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
if (bit < bm_block_bits(bb))
goto Return_pfn;
bit = bm->cur.bit;
bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
if (bit < bm_block_bits(bb))
goto Return_pfn;
bb = list_entry(bb->hook.next, struct bm_block, hook);
bm->cur.block = bb;
bm->cur.bit = 0;
} while (&bb->hook != &bm->blocks);
bb = bb->next;
bm->cur.block = bb;
bm->cur.bit = 0;
} while (bb);
zone_bm = bm->cur.zone_bm->next;
if (zone_bm) {
bm->cur.zone_bm = zone_bm;
bm->cur.block = zone_bm->bm_blocks;
bm->cur.bit = 0;
}
} while (zone_bm);
memory_bm_position_reset(bm);
return BM_END_OF_MAP;
@@ -808,8 +818,7 @@ static unsigned int count_free_highmem_pages(void)
* We should save the page if it isn't Nosave or NosaveFree, or Reserved,
* and it isn't a part of a free chunk of pages.
*/
static struct page *saveable_highmem_page(unsigned long pfn)
static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
{
struct page *page;
@@ -817,6 +826,8 @@ static struct page *saveable_highmem_page(unsigned long pfn)
return NULL;
page = pfn_to_page(pfn);
if (page_zone(page) != zone)
return NULL;
BUG_ON(!PageHighMem(page));
@@ -846,13 +857,16 @@ unsigned int count_highmem_pages(void)
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if (saveable_highmem_page(pfn))
if (saveable_highmem_page(zone, pfn))
n++;
}
return n;
}
#else
static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
{
return NULL;
}
#endif /* CONFIG_HIGHMEM */
/**
@@ -863,8 +877,7 @@ static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
* of pages statically defined as 'unsaveable', and it isn't a part of
* a free chunk of pages.
*/
static struct page *saveable_page(unsigned long pfn)
static struct page *saveable_page(struct zone *zone, unsigned long pfn)
{
struct page *page;
@@ -872,6 +885,8 @@ static struct page *saveable_page(unsigned long pfn)
return NULL;
page = pfn_to_page(pfn);
if (page_zone(page) != zone)
return NULL;
BUG_ON(PageHighMem(page));
@@ -903,7 +918,7 @@ unsigned int count_data_pages(void)
mark_free_pages(zone);
max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
if(saveable_page(pfn))
if (saveable_page(zone, pfn))
n++;
}
return n;
@@ -944,7 +959,7 @@ static inline struct page *
page_is_saveable(struct zone *zone, unsigned long pfn)
{
return is_highmem(zone) ?
saveable_highmem_page(pfn) : saveable_page(pfn);
saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
}
static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
@@ -966,7 +981,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
* data modified by kmap_atomic()
*/
safe_copy_page(buffer, s_page);
dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
dst = kmap_atomic(d_page, KM_USER0);
memcpy(dst, buffer, PAGE_SIZE);
kunmap_atomic(dst, KM_USER0);
} else {
@@ -975,7 +990,7 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
}
}
#else
#define page_is_saveable(zone, pfn) saveable_page(pfn)
#define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
{
@@ -1459,9 +1474,7 @@ load_header(struct swsusp_info *info)
* unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
* the corresponding bit in the memory bitmap @bm
*/
static inline void
unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
{
int j;
@@ -1469,8 +1482,13 @@ unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
if (unlikely(buf[j] == BM_END_OF_MAP))
break;
memory_bm_set_bit(bm, buf[j]);
if (memory_bm_pfn_present(bm, buf[j]))
memory_bm_set_bit(bm, buf[j]);
else
return -EFAULT;
}
return 0;
}
/* List of "safe" pages that may be used to store data loaded from the suspend
@@ -1608,7 +1626,7 @@ get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
if (!pbe) {
swsusp_free();
return NULL;
return ERR_PTR(-ENOMEM);
}
pbe->orig_page = page;
if (safe_highmem_pages > 0) {
@@ -1677,7 +1695,7 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
static inline void *
get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
{
return NULL;
return ERR_PTR(-EINVAL);
}
static inline void copy_last_highmem_page(void) {}
@@ -1788,8 +1806,13 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
{
struct pbe *pbe;
struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
struct page *page;
unsigned long pfn = memory_bm_next_pfn(bm);
if (pfn == BM_END_OF_MAP)
return ERR_PTR(-EFAULT);
page = pfn_to_page(pfn);
if (PageHighMem(page))
return get_highmem_page_buffer(page, ca);
@@ -1805,7 +1828,7 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
pbe = chain_alloc(ca, sizeof(struct pbe));
if (!pbe) {
swsusp_free();
return NULL;
return ERR_PTR(-ENOMEM);
}
pbe->orig_address = page_address(page);
pbe->address = safe_pages_list;
@@ -1868,7 +1891,10 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
return error;
} else if (handle->prev <= nr_meta_pages) {
unpack_orig_pfns(buffer, &copy_bm);
error = unpack_orig_pfns(buffer, &copy_bm);
if (error)
return error;
if (handle->prev == nr_meta_pages) {
error = prepare_image(&orig_bm, &copy_bm);
if (error)
@@ -1879,12 +1905,14 @@ int snapshot_write_next(struct snapshot_handle *handle, size_t count)
restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca);
handle->sync_read = 0;
if (!handle->buffer)
return -ENOMEM;
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
}
} else {
copy_last_highmem_page();
handle->buffer = get_buffer(&orig_bm, &ca);
if (IS_ERR(handle->buffer))
return PTR_ERR(handle->buffer);
if (handle->buffer != buffer)
handle->sync_read = 0;
}

View File

@@ -262,3 +262,125 @@ int swsusp_shrink_memory(void)
return 0;
}
/*
* Platforms, like ACPI, may want us to save some memory used by them during
* hibernation and to restore the contents of this memory during the subsequent
* resume. The code below implements a mechanism allowing us to do that.
*/
struct nvs_page {
unsigned long phys_start;
unsigned int size;
void *kaddr;
void *data;
struct list_head node;
};
static LIST_HEAD(nvs_list);
/**
* hibernate_nvs_register - register platform NVS memory region to save
* @start - physical address of the region
* @size - size of the region
*
* The NVS region need not be page-aligned (both ends) and we arrange
* things so that the data from page-aligned addresses in this region will
* be copied into separate RAM pages.
*/
int hibernate_nvs_register(unsigned long start, unsigned long size)
{
struct nvs_page *entry, *next;
while (size > 0) {
unsigned int nr_bytes;
entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
if (!entry)
goto Error;
list_add_tail(&entry->node, &nvs_list);
entry->phys_start = start;
nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
entry->size = (size < nr_bytes) ? size : nr_bytes;
start += entry->size;
size -= entry->size;
}
return 0;
Error:
list_for_each_entry_safe(entry, next, &nvs_list, node) {
list_del(&entry->node);
kfree(entry);
}
return -ENOMEM;
}
/**
* hibernate_nvs_free - free data pages allocated for saving NVS regions
*/
void hibernate_nvs_free(void)
{
struct nvs_page *entry;
list_for_each_entry(entry, &nvs_list, node)
if (entry->data) {
free_page((unsigned long)entry->data);
entry->data = NULL;
if (entry->kaddr) {
iounmap(entry->kaddr);
entry->kaddr = NULL;
}
}
}
/**
* hibernate_nvs_alloc - allocate memory necessary for saving NVS regions
*/
int hibernate_nvs_alloc(void)
{
struct nvs_page *entry;
list_for_each_entry(entry, &nvs_list, node) {
entry->data = (void *)__get_free_page(GFP_KERNEL);
if (!entry->data) {
hibernate_nvs_free();
return -ENOMEM;
}
}
return 0;
}
/**
* hibernate_nvs_save - save NVS memory regions
*/
void hibernate_nvs_save(void)
{
struct nvs_page *entry;
printk(KERN_INFO "PM: Saving platform NVS memory\n");
list_for_each_entry(entry, &nvs_list, node)
if (entry->data) {
entry->kaddr = ioremap(entry->phys_start, entry->size);
memcpy(entry->data, entry->kaddr, entry->size);
}
}
/**
* hibernate_nvs_restore - restore NVS memory regions
*
* This function is going to be called with interrupts disabled, so it
* cannot iounmap the virtual addresses used to access the NVS region.
*/
void hibernate_nvs_restore(void)
{
struct nvs_page *entry;
printk(KERN_INFO "PM: Restoring platform NVS memory\n");
list_for_each_entry(entry, &nvs_list, node)
if (entry->data)
memcpy(entry->kaddr, entry->data, entry->size);
}

View File

@@ -619,7 +619,7 @@ static int acquire_console_semaphore_for_printk(unsigned int cpu)
static const char recursion_bug_msg [] =
KERN_CRIT "BUG: recent printk recursion!\n";
static int recursion_bug;
static int new_text_line = 1;
static int new_text_line = 1;
static char printk_buf[1024];
asmlinkage int vprintk(const char *fmt, va_list args)

View File

@@ -45,7 +45,7 @@ static unsigned long prof_len, prof_shift;
int prof_on __read_mostly;
EXPORT_SYMBOL_GPL(prof_on);
static cpumask_t prof_cpu_mask = CPU_MASK_ALL;
static cpumask_var_t prof_cpu_mask;
#ifdef CONFIG_SMP
static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits);
static DEFINE_PER_CPU(int, cpu_profile_flip);
@@ -113,9 +113,13 @@ int __ref profile_init(void)
buffer_bytes = prof_len*sizeof(atomic_t);
if (!slab_is_available()) {
prof_buffer = alloc_bootmem(buffer_bytes);
alloc_bootmem_cpumask_var(&prof_cpu_mask);
return 0;
}
if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))
return -ENOMEM;
prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);
if (prof_buffer)
return 0;
@@ -128,6 +132,7 @@ int __ref profile_init(void)
if (prof_buffer)
return 0;
free_cpumask_var(prof_cpu_mask);
return -ENOMEM;
}
@@ -386,13 +391,15 @@ out_free:
return NOTIFY_BAD;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
cpu_set(cpu, prof_cpu_mask);
if (prof_cpu_mask != NULL)
cpumask_set_cpu(cpu, prof_cpu_mask);
break;
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
case CPU_DEAD_FROZEN:
cpu_clear(cpu, prof_cpu_mask);
if (prof_cpu_mask != NULL)
cpumask_clear_cpu(cpu, prof_cpu_mask);
if (per_cpu(cpu_profile_hits, cpu)[0]) {
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]);
per_cpu(cpu_profile_hits, cpu)[0] = NULL;
@@ -430,19 +437,19 @@ void profile_tick(int type)
if (type == CPU_PROFILING && timer_hook)
timer_hook(regs);
if (!user_mode(regs) && cpu_isset(smp_processor_id(), prof_cpu_mask))
if (!user_mode(regs) && prof_cpu_mask != NULL &&
cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
profile_hit(type, (void *)profile_pc(regs));
}
#ifdef CONFIG_PROC_FS
#include <linux/proc_fs.h>
#include <asm/uaccess.h>
#include <asm/ptrace.h>
static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
int len = cpumask_scnprintf(page, count, *(cpumask_t *)data);
int len = cpumask_scnprintf(page, count, data);
if (count - len < 2)
return -EINVAL;
len += sprintf(page + len, "\n");
@@ -452,16 +459,20 @@ static int prof_cpu_mask_read_proc(char *page, char **start, off_t off,
static int prof_cpu_mask_write_proc(struct file *file,
const char __user *buffer, unsigned long count, void *data)
{
cpumask_t *mask = (cpumask_t *)data;
struct cpumask *mask = data;
unsigned long full_count = count, err;
cpumask_t new_value;
cpumask_var_t new_value;
if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
return -ENOMEM;
err = cpumask_parse_user(buffer, count, new_value);
if (err)
return err;
*mask = new_value;
return full_count;
if (!err) {
cpumask_copy(mask, new_value);
err = full_count;
}
free_cpumask_var(new_value);
return err;
}
void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
@@ -472,7 +483,7 @@ void create_prof_cpu_mask(struct proc_dir_entry *root_irq_dir)
entry = create_proc_entry("prof_cpu_mask", 0600, root_irq_dir);
if (!entry)
return;
entry->data = (void *)&prof_cpu_mask;
entry->data = prof_cpu_mask;
entry->read_proc = prof_cpu_mask_read_proc;
entry->write_proc = prof_cpu_mask_write_proc;
}

View File

@@ -63,14 +63,14 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
.completed = -300,
.pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
.cpumask = CPU_BITS_NONE,
};
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
.cur = -300,
.completed = -300,
.pending = -300,
.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
.cpumask = CPU_MASK_NONE,
.cpumask = CPU_BITS_NONE,
};
DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
@@ -85,7 +85,6 @@ static void force_quiescent_state(struct rcu_data *rdp,
struct rcu_ctrlblk *rcp)
{
int cpu;
cpumask_t cpumask;
unsigned long flags;
set_need_resched();
@@ -96,10 +95,10 @@ static void force_quiescent_state(struct rcu_data *rdp,
* Don't send IPI to itself. With irqs disabled,
* rdp->cpu is the current cpu.
*
* cpu_online_map is updated by the _cpu_down()
* cpu_online_mask is updated by the _cpu_down()
* using __stop_machine(). Since we're in irqs disabled
* section, __stop_machine() is not exectuting, hence
* the cpu_online_map is stable.
* the cpu_online_mask is stable.
*
* However, a cpu might have been offlined _just_ before
* we disabled irqs while entering here.
@@ -107,13 +106,14 @@ static void force_quiescent_state(struct rcu_data *rdp,
* notification, leading to the offlined cpu's bit
* being set in the rcp->cpumask.
*
* Hence cpumask = (rcp->cpumask & cpu_online_map) to prevent
* Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
* sending smp_reschedule() to an offlined CPU.
*/
cpus_and(cpumask, rcp->cpumask, cpu_online_map);
cpu_clear(rdp->cpu, cpumask);
for_each_cpu_mask_nr(cpu, cpumask)
smp_send_reschedule(cpu);
for_each_cpu_and(cpu,
to_cpumask(rcp->cpumask), cpu_online_mask) {
if (cpu != rdp->cpu)
smp_send_reschedule(cpu);
}
}
spin_unlock_irqrestore(&rcp->lock, flags);
}
@@ -193,7 +193,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
for_each_possible_cpu(cpu) {
if (cpu_isset(cpu, rcp->cpumask))
if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
printk(" %d", cpu);
}
printk(" (detected by %d, t=%ld jiffies)\n",
@@ -221,7 +221,8 @@ static void check_cpu_stall(struct rcu_ctrlblk *rcp)
long delta;
delta = jiffies - rcp->jiffies_stall;
if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
delta >= 0) {
/* We haven't checked in, so go dump stack. */
print_cpu_stall(rcp);
@@ -393,7 +394,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
* unnecessarily.
*/
smp_mb();
cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
cpumask_andnot(to_cpumask(rcp->cpumask),
cpu_online_mask, nohz_cpu_mask);
rcp->signaled = 0;
}
@@ -406,8 +408,8 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
*/
static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
{
cpu_clear(cpu, rcp->cpumask);
if (cpus_empty(rcp->cpumask)) {
cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
if (cpumask_empty(to_cpumask(rcp->cpumask))) {
/* batch completed ! */
rcp->completed = rcp->cur;
rcu_start_batch(rcp);

View File

@@ -77,8 +77,15 @@ void wakeme_after_rcu(struct rcu_head *head)
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
void synchronize_rcu(void); /* Makes kernel-doc tools happy */
synchronize_rcu_xxx(synchronize_rcu, call_rcu)
void synchronize_rcu(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
static void rcu_barrier_callback(struct rcu_head *notused)

View File

@@ -164,7 +164,8 @@ static char *rcu_try_flip_state_names[] =
{ "idle", "waitack", "waitzero", "waitmb" };
#endif /* #ifdef CONFIG_RCU_TRACE */
static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly
= CPU_BITS_NONE;
/*
* Enum and per-CPU flag to determine when each CPU has seen
@@ -758,7 +759,7 @@ rcu_try_flip_idle(void)
/* Now ask each CPU for acknowledgement of the flip. */
for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
dyntick_save_progress_counter(cpu);
}
@@ -776,7 +777,7 @@ rcu_try_flip_waitack(void)
int cpu;
RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
if (rcu_try_flip_waitack_needed(cpu) &&
per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
@@ -808,7 +809,7 @@ rcu_try_flip_waitzero(void)
/* Check to see if the sum of the "last" counters is zero. */
RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
if (sum != 0) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -823,7 +824,7 @@ rcu_try_flip_waitzero(void)
smp_mb(); /* ^^^^^^^^^^^^ */
/* Call for a memory barrier from each CPU. */
for_each_cpu_mask_nr(cpu, rcu_cpu_online_map) {
for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
dyntick_save_progress_counter(cpu);
}
@@ -843,7 +844,7 @@ rcu_try_flip_waitmb(void)
int cpu;
RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
for_each_cpu_mask_nr(cpu, rcu_cpu_online_map)
for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
if (rcu_try_flip_waitmb_needed(cpu) &&
per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
@@ -1032,7 +1033,7 @@ void rcu_offline_cpu(int cpu)
RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
cpu_clear(cpu, rcu_cpu_online_map);
cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map));
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
@@ -1072,7 +1073,7 @@ void __cpuinit rcu_online_cpu(int cpu)
struct rcu_data *rdp;
spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
cpu_set(cpu, rcu_cpu_online_map);
cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map));
spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
/*
@@ -1176,7 +1177,16 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
* in -rt this does -not- necessarily result in all currently executing
* interrupt -handlers- having completed.
*/
synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
void __synchronize_sched(void)
{
struct rcu_synchronize rcu;
init_completion(&rcu.completion);
/* Will wake me after RCU finished. */
call_rcu_sched(&rcu.head, wakeme_after_rcu);
/* Wait for it. */
wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(__synchronize_sched);
/*
@@ -1430,7 +1440,7 @@ void __init __rcu_init(void)
* We don't need protection against CPU-Hotplug here
* since
* a) If a CPU comes online while we are iterating over the
* cpu_online_map below, we would only end up making a
* cpu_online_mask below, we would only end up making a
* duplicate call to rcu_online_cpu() which sets the corresponding
* CPU's mask in the rcu_cpu_online_map.
*

View File

@@ -136,30 +136,46 @@ static int stutter_pause_test = 0;
#endif
int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
#define FULLSTOP_SIGNALED 1 /* Bail due to signal. */
#define FULLSTOP_CLEANUP 2 /* Orderly shutdown. */
static int fullstop; /* stop generating callbacks at test end. */
DEFINE_MUTEX(fullstop_mutex); /* protect fullstop transitions and */
/* spawning of kthreads. */
/* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */
#define FULLSTOP_DONTSTOP 0 /* Normal operation. */
#define FULLSTOP_SHUTDOWN 1 /* System shutdown with rcutorture running. */
#define FULLSTOP_RMMOD 2 /* Normal rmmod of rcutorture. */
static int fullstop = FULLSTOP_RMMOD;
DEFINE_MUTEX(fullstop_mutex); /* Protect fullstop transitions and spawning */
/* of kthreads. */
/*
* Detect and respond to a signal-based shutdown.
* Detect and respond to a system shutdown.
*/
static int
rcutorture_shutdown_notify(struct notifier_block *unused1,
unsigned long unused2, void *unused3)
{
if (fullstop)
return NOTIFY_DONE;
if (signal_pending(current)) {
mutex_lock(&fullstop_mutex);
if (!ACCESS_ONCE(fullstop))
fullstop = FULLSTOP_SIGNALED;
mutex_unlock(&fullstop_mutex);
}
mutex_lock(&fullstop_mutex);
if (fullstop == FULLSTOP_DONTSTOP)
fullstop = FULLSTOP_SHUTDOWN;
else
printk(KERN_WARNING /* but going down anyway, so... */
"Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
return NOTIFY_DONE;
}
/*
* Absorb kthreads into a kernel function that won't return, so that
* they won't ever access module text or data again.
*/
static void rcutorture_shutdown_absorb(char *title)
{
if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
printk(KERN_NOTICE
"rcutorture thread %s parking due to system shutdown\n",
title);
schedule_timeout_uninterruptible(MAX_SCHEDULE_TIMEOUT);
}
}
/*
* Allocate an element from the rcu_tortures pool.
*/
@@ -221,13 +237,14 @@ rcu_random(struct rcu_random_state *rrsp)
}
static void
rcu_stutter_wait(void)
rcu_stutter_wait(char *title)
{
while ((stutter_pause_test || !rcutorture_runnable) && !fullstop) {
while (stutter_pause_test || !rcutorture_runnable) {
if (rcutorture_runnable)
schedule_timeout_interruptible(1);
else
schedule_timeout_interruptible(round_jiffies_relative(HZ));
rcutorture_shutdown_absorb(title);
}
}
@@ -289,7 +306,7 @@ rcu_torture_cb(struct rcu_head *p)
int i;
struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
if (fullstop) {
if (fullstop != FULLSTOP_DONTSTOP) {
/* Test is ending, just drop callbacks on the floor. */
/* The next initialization will pick up the pieces. */
return;
@@ -621,10 +638,11 @@ rcu_torture_writer(void *arg)
}
rcu_torture_current_version++;
oldbatch = cur_ops->completed();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
rcu_stutter_wait("rcu_torture_writer");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
rcutorture_shutdown_absorb("rcu_torture_writer");
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -645,11 +663,12 @@ rcu_torture_fakewriter(void *arg)
schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
udelay(rcu_random(&rand) & 0x3ff);
cur_ops->sync();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
rcu_stutter_wait("rcu_torture_fakewriter");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
rcutorture_shutdown_absorb("rcu_torture_fakewriter");
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -754,12 +773,13 @@ rcu_torture_reader(void *arg)
preempt_enable();
cur_ops->readunlock(idx);
schedule();
rcu_stutter_wait();
} while (!kthread_should_stop() && !fullstop);
rcu_stutter_wait("rcu_torture_reader");
} while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
rcutorture_shutdown_absorb("rcu_torture_reader");
if (irqreader && cur_ops->irqcapable)
del_timer_sync(&t);
while (!kthread_should_stop() && fullstop != FULLSTOP_SIGNALED)
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
@@ -856,7 +876,8 @@ rcu_torture_stats(void *arg)
do {
schedule_timeout_interruptible(stat_interval * HZ);
rcu_torture_stats_print();
} while (!kthread_should_stop() && !fullstop);
rcutorture_shutdown_absorb("rcu_torture_stats");
} while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
return 0;
}
@@ -924,7 +945,8 @@ rcu_torture_shuffle(void *arg)
do {
schedule_timeout_interruptible(shuffle_interval * HZ);
rcu_torture_shuffle_tasks();
} while (!kthread_should_stop() && !fullstop);
rcutorture_shutdown_absorb("rcu_torture_shuffle");
} while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_shuffle task stopping");
return 0;
}
@@ -939,10 +961,11 @@ rcu_torture_stutter(void *arg)
do {
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 1;
if (!kthread_should_stop() && !fullstop)
if (!kthread_should_stop())
schedule_timeout_interruptible(stutter * HZ);
stutter_pause_test = 0;
} while (!kthread_should_stop() && !fullstop);
rcutorture_shutdown_absorb("rcu_torture_stutter");
} while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
return 0;
}
@@ -969,15 +992,16 @@ rcu_torture_cleanup(void)
int i;
mutex_lock(&fullstop_mutex);
if (!fullstop) {
/* If being signaled, let it happen, then exit. */
if (fullstop == FULLSTOP_SHUTDOWN) {
printk(KERN_WARNING /* but going down anyway, so... */
"Concurrent 'rmmod rcutorture' and shutdown illegal!\n");
mutex_unlock(&fullstop_mutex);
schedule_timeout_interruptible(10 * HZ);
schedule_timeout_uninterruptible(10);
if (cur_ops->cb_barrier != NULL)
cur_ops->cb_barrier();
return;
}
fullstop = FULLSTOP_CLEANUP;
fullstop = FULLSTOP_RMMOD;
mutex_unlock(&fullstop_mutex);
unregister_reboot_notifier(&rcutorture_nb);
if (stutter_task) {
@@ -1077,7 +1101,7 @@ rcu_torture_init(void)
else
nrealreaders = 2 * num_online_cpus();
rcu_torture_print_module_parms("Start of test");
fullstop = 0;
fullstop = FULLSTOP_DONTSTOP;
/* Set up the freelist. */

View File

@@ -79,7 +79,10 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
#ifdef CONFIG_NO_HZ
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks);
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
.dynticks_nesting = 1,
.dynticks = 1,
};
#endif /* #ifdef CONFIG_NO_HZ */
static int blimit = 10; /* Maximum callbacks per softirq. */
@@ -572,6 +575,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
/* Special-case the common single-level case. */
if (NUM_RCU_NODES == 1) {
rnp->qsmask = rnp->qsmaskinit;
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
spin_unlock_irqrestore(&rnp->lock, flags);
return;
}
@@ -1379,13 +1383,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
static void __cpuinit rcu_online_cpu(int cpu)
{
#ifdef CONFIG_NO_HZ
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
rdtp->dynticks_nesting = 1;
rdtp->dynticks |= 1; /* need consecutive #s even for hotplug. */
rdtp->dynticks_nmi = (rdtp->dynticks_nmi + 1) & ~0x1;
#endif /* #ifdef CONFIG_NO_HZ */
rcu_init_percpu_data(cpu, &rcu_state);
rcu_init_percpu_data(cpu, &rcu_bh_state);
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

View File

@@ -15,10 +15,11 @@
#include <linux/uaccess.h>
#include <linux/mm.h>
void res_counter_init(struct res_counter *counter)
void res_counter_init(struct res_counter *counter, struct res_counter *parent)
{
spin_lock_init(&counter->lock);
counter->limit = (unsigned long long)LLONG_MAX;
counter->parent = parent;
}
int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
@@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
return 0;
}
int res_counter_charge(struct res_counter *counter, unsigned long val)
int res_counter_charge(struct res_counter *counter, unsigned long val,
struct res_counter **limit_fail_at)
{
int ret;
unsigned long flags;
struct res_counter *c, *u;
spin_lock_irqsave(&counter->lock, flags);
ret = res_counter_charge_locked(counter, val);
spin_unlock_irqrestore(&counter->lock, flags);
*limit_fail_at = NULL;
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
ret = res_counter_charge_locked(c, val);
spin_unlock(&c->lock);
if (ret < 0) {
*limit_fail_at = c;
goto undo;
}
}
ret = 0;
goto done;
undo:
for (u = counter; u != c; u = u->parent) {
spin_lock(&u->lock);
res_counter_uncharge_locked(u, val);
spin_unlock(&u->lock);
}
done:
local_irq_restore(flags);
return ret;
}
@@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
void res_counter_uncharge(struct res_counter *counter, unsigned long val)
{
unsigned long flags;
struct res_counter *c;
spin_lock_irqsave(&counter->lock, flags);
res_counter_uncharge_locked(counter, val);
spin_unlock_irqrestore(&counter->lock, flags);
local_irq_save(flags);
for (c = counter; c != NULL; c = c->parent) {
spin_lock(&c->lock);
res_counter_uncharge_locked(c, val);
spin_unlock(&c->lock);
}
local_irq_restore(flags);
}

View File

@@ -623,7 +623,7 @@ resource_size_t resource_alignment(struct resource *res)
*/
struct resource * __request_region(struct resource *parent,
resource_size_t start, resource_size_t n,
const char *name)
const char *name, int flags)
{
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
@@ -634,6 +634,7 @@ struct resource * __request_region(struct resource *parent,
res->start = start;
res->end = start + n - 1;
res->flags = IORESOURCE_BUSY;
res->flags |= flags;
write_lock(&resource_lock);
@@ -679,7 +680,7 @@ int __check_region(struct resource *parent, resource_size_t start,
{
struct resource * res;
res = __request_region(parent, start, n, "check-region");
res = __request_region(parent, start, n, "check-region", 0);
if (!res)
return -EBUSY;
@@ -776,7 +777,7 @@ struct resource * __devm_request_region(struct device *dev,
dr->start = start;
dr->n = n;
res = __request_region(parent, start, n, name);
res = __request_region(parent, start, n, name, 0);
if (res)
devres_add(dev, dr);
else
@@ -876,3 +877,57 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
return err;
}
#ifdef CONFIG_STRICT_DEVMEM
static int strict_iomem_checks = 1;
#else
static int strict_iomem_checks;
#endif
/*
* check if an address is reserved in the iomem resource tree
* returns 1 if reserved, 0 if not reserved.
*/
int iomem_is_exclusive(u64 addr)
{
struct resource *p = &iomem_resource;
int err = 0;
loff_t l;
int size = PAGE_SIZE;
if (!strict_iomem_checks)
return 0;
addr = addr & PAGE_MASK;
read_lock(&resource_lock);
for (p = p->child; p ; p = r_next(NULL, p, &l)) {
/*
* We can probably skip the resources without
* IORESOURCE_IO attribute?
*/
if (p->start >= addr + size)
break;
if (p->end < addr)
continue;
if (p->flags & IORESOURCE_BUSY &&
p->flags & IORESOURCE_EXCLUSIVE) {
err = 1;
break;
}
}
read_unlock(&resource_lock);
return err;
}
static int __init strict_iomem(char *str)
{
if (strstr(str, "relaxed"))
strict_iomem_checks = 0;
if (strstr(str, "strict"))
strict_iomem_checks = 1;
return 1;
}
__setup("iomem=", strict_iomem);

File diff suppressed because it is too large Load Diff

View File

@@ -124,7 +124,7 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
clock = scd->tick_gtod + delta;
min_clock = wrap_max(scd->tick_gtod, scd->clock);
max_clock = scd->tick_gtod + TICK_NSEC;
max_clock = wrap_max(scd->clock, scd->tick_gtod + TICK_NSEC);
clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock);
@@ -227,6 +227,9 @@ EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
*/
void sched_clock_idle_wakeup_event(u64 delta_ns)
{
if (timekeeping_suspended)
return;
sched_clock_tick();
touch_softlockup_watchdog();
}

View File

@@ -67,24 +67,21 @@ static int convert_prio(int prio)
* Returns: (int)bool - CPUs were found
*/
int cpupri_find(struct cpupri *cp, struct task_struct *p,
cpumask_t *lowest_mask)
struct cpumask *lowest_mask)
{
int idx = 0;
int task_pri = convert_prio(p->prio);
for_each_cpupri_active(cp->pri_active, idx) {
struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
cpumask_t mask;
if (idx >= task_pri)
break;
cpus_and(mask, p->cpus_allowed, vec->mask);
if (cpus_empty(mask))
if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
continue;
*lowest_mask = mask;
cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
return 1;
}
@@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
vec->count--;
if (!vec->count)
clear_bit(oldpri, cp->pri_active);
cpu_clear(cpu, vec->mask);
cpumask_clear_cpu(cpu, vec->mask);
spin_unlock_irqrestore(&vec->lock, flags);
}
@@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
spin_lock_irqsave(&vec->lock, flags);
cpu_set(cpu, vec->mask);
cpumask_set_cpu(cpu, vec->mask);
vec->count++;
if (vec->count == 1)
set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
/**
* cpupri_init - initialize the cpupri structure
* @cp: The cpupri context
* @bootmem: true if allocations need to use bootmem
*
* Returns: (void)
* Returns: -ENOMEM if memory fails.
*/
void cpupri_init(struct cpupri *cp)
int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
{
int i;
@@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp)
spin_lock_init(&vec->lock);
vec->count = 0;
cpus_clear(vec->mask);
if (bootmem)
alloc_bootmem_cpumask_var(&vec->mask);
else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup;
}
for_each_possible_cpu(i)
cp->cpu_to_pri[i] = CPUPRI_INVALID;
return 0;
cleanup:
for (i--; i >= 0; i--)
free_cpumask_var(cp->pri_to_cpu[i].mask);
return -ENOMEM;
}
/**
* cpupri_cleanup - clean up the cpupri structure
* @cp: The cpupri context
*/
void cpupri_cleanup(struct cpupri *cp)
{
int i;
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
free_cpumask_var(cp->pri_to_cpu[i].mask);
}

View File

@@ -14,7 +14,7 @@
struct cpupri_vec {
spinlock_t lock;
int count;
cpumask_t mask;
cpumask_var_t mask;
};
struct cpupri {
@@ -27,7 +27,8 @@ struct cpupri {
int cpupri_find(struct cpupri *cp,
struct task_struct *p, cpumask_t *lowest_mask);
void cpupri_set(struct cpupri *cp, int cpu, int pri);
void cpupri_init(struct cpupri *cp);
int cpupri_init(struct cpupri *cp, bool bootmem);
void cpupri_cleanup(struct cpupri *cp);
#else
#define cpupri_set(cp, cpu, pri) do { } while (0)
#define cpupri_init() do { } while (0)

View File

@@ -145,6 +145,19 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
read_unlock_irqrestore(&tasklist_lock, flags);
}
#if defined(CONFIG_CGROUP_SCHED) && \
(defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED))
static void task_group_path(struct task_group *tg, char *buf, int buflen)
{
/* may be NULL if the underlying cgroup isn't fully-created yet */
if (!tg->css.cgroup) {
buf[0] = '\0';
return;
}
cgroup_path(tg->css.cgroup, buf, buflen);
}
#endif
void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
{
s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -154,10 +167,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
unsigned long flags;
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
char path[128] = "";
char path[128];
struct task_group *tg = cfs_rq->tg;
cgroup_path(tg->css.cgroup, path, sizeof(path));
task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
@@ -208,10 +221,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
{
#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
char path[128] = "";
char path[128];
struct task_group *tg = rt_rq->tg;
cgroup_path(tg->css.cgroup, path, sizeof(path));
task_group_path(tg, path, sizeof(path));
SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
#else

View File

@@ -385,20 +385,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
}
#endif
/*
* delta *= P[w / rw]
*/
static inline unsigned long
calc_delta_weight(unsigned long delta, struct sched_entity *se)
{
for_each_sched_entity(se) {
delta = calc_delta_mine(delta,
se->load.weight, &cfs_rq_of(se)->load);
}
return delta;
}
/*
* delta /= w
*/
@@ -440,12 +426,20 @@ static u64 __sched_period(unsigned long nr_running)
*/
static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
unsigned long nr_running = cfs_rq->nr_running;
u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq);
if (unlikely(!se->on_rq))
nr_running++;
for_each_sched_entity(se) {
struct load_weight *load = &cfs_rq->load;
return calc_delta_weight(__sched_period(nr_running), se);
if (unlikely(!se->on_rq)) {
struct load_weight lw = cfs_rq->load;
update_load_add(&lw, se->load.weight);
load = &lw;
}
slice = calc_delta_mine(slice, se->load.weight, load);
}
return slice;
}
/*
@@ -1019,16 +1013,33 @@ static void yield_task_fair(struct rq *rq)
* search starts with cpus closest then further out as needed,
* so we always favor a closer, idle cpu.
* Domains may include CPUs that are not usable for migration,
* hence we need to mask them out (cpu_active_map)
* hence we need to mask them out (cpu_active_mask)
*
* Returns the CPU we should wake onto.
*/
#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
static int wake_idle(int cpu, struct task_struct *p)
{
cpumask_t tmp;
struct sched_domain *sd;
int i;
unsigned int chosen_wakeup_cpu;
int this_cpu;
/*
* At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
* are idle and this is not a kernel thread and this task's affinity
* allows it to be moved to preferred cpu, then just move!
*/
this_cpu = smp_processor_id();
chosen_wakeup_cpu =
cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
idle_cpu(cpu) && idle_cpu(this_cpu) &&
p->mm && !(p->flags & PF_KTHREAD) &&
cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
return chosen_wakeup_cpu;
/*
* If it is idle, then it is the best cpu to run this task.
@@ -1046,10 +1057,9 @@ static int wake_idle(int cpu, struct task_struct *p)
if ((sd->flags & SD_WAKE_IDLE)
|| ((sd->flags & SD_WAKE_IDLE_FAR)
&& !task_hot(p, task_rq(p)->clock, sd))) {
cpus_and(tmp, sd->span, p->cpus_allowed);
cpus_and(tmp, tmp, cpu_active_map);
for_each_cpu_mask_nr(i, tmp) {
if (idle_cpu(i)) {
for_each_cpu_and(i, sched_domain_span(sd),
&p->cpus_allowed) {
if (cpu_active(i) && idle_cpu(i)) {
if (i != task_cpu(p)) {
schedstat_inc(p,
se.nr_wakeups_idle);
@@ -1242,13 +1252,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
* this_cpu and prev_cpu are present in:
*/
for_each_domain(this_cpu, sd) {
if (cpu_isset(prev_cpu, sd->span)) {
if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
this_sd = sd;
break;
}
}
if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
goto out;
/*
@@ -1607,8 +1617,6 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
}
}
#define swap(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while (0)
/*
* Share the fairness runtime between parent and child, thus the
* total amount of pressure for CPU stays equal - new tasks

View File

@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
if (!rq->online)
return;
cpu_set(rq->cpu, rq->rd->rto_mask);
cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
/*
* Make sure the mask is visible before we set
* the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
/* the order here really doesn't matter */
atomic_dec(&rq->rd->rto_count);
cpu_clear(rq->cpu, rq->rd->rto_mask);
cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
}
static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
}
#ifdef CONFIG_SMP
static inline cpumask_t sched_rt_period_mask(void)
static inline const struct cpumask *sched_rt_period_mask(void)
{
return cpu_rq(smp_processor_id())->rd->span;
}
#else
static inline cpumask_t sched_rt_period_mask(void)
static inline const struct cpumask *sched_rt_period_mask(void)
{
return cpu_online_map;
return cpu_online_mask;
}
#endif
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
return rt_rq->rt_throttled;
}
static inline cpumask_t sched_rt_period_mask(void)
static inline const struct cpumask *sched_rt_period_mask(void)
{
return cpu_online_map;
return cpu_online_mask;
}
static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
int i, weight, more = 0;
u64 rt_period;
weight = cpus_weight(rd->span);
weight = cpumask_weight(rd->span);
spin_lock(&rt_b->rt_runtime_lock);
rt_period = ktime_to_ns(rt_b->rt_period);
for_each_cpu_mask_nr(i, rd->span) {
for_each_cpu(i, rd->span) {
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff;
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
/*
* Greedy reclaim, take back as much as we can.
*/
for_each_cpu_mask(i, rd->span) {
for_each_cpu(i, rd->span) {
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
s64 diff;
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
{
int i, idle = 1;
cpumask_t span;
const struct cpumask *span;
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
return 1;
span = sched_rt_period_mask();
for_each_cpu_mask(i, span) {
for_each_cpu(i, span) {
int enqueue = 0;
struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
{
cpumask_t mask;
cpumask_var_t mask;
if (rq->curr->rt.nr_cpus_allowed == 1)
return;
if (p->rt.nr_cpus_allowed != 1
&& cpupri_find(&rq->rd->cpupri, p, &mask))
if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
return;
if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
return;
if (p->rt.nr_cpus_allowed != 1
&& cpupri_find(&rq->rd->cpupri, p, mask))
goto free;
if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
goto free;
/*
* There appears to be other cpus that can accept
@@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
*/
requeue_task_rt(rq, p, 1);
resched_task(rq->curr);
free:
free_cpumask_var(mask);
}
#endif /* CONFIG_SMP */
@@ -914,7 +919,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{
if (!task_running(rq, p) &&
(cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
(cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
(p->rt.nr_cpus_allowed > 1))
return 1;
return 0;
@@ -953,18 +958,19 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
return next;
}
static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
static inline int pick_optimal_cpu(int this_cpu,
const struct cpumask *mask)
{
int first;
/* "this_cpu" is cheaper to preempt than a remote processor */
if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
return this_cpu;
first = first_cpu(*mask);
if (first != NR_CPUS)
first = cpumask_first(mask);
if (first < nr_cpu_ids)
return first;
return -1;
@@ -973,9 +979,10 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
static int find_lowest_rq(struct task_struct *task)
{
struct sched_domain *sd;
cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
cpumask_var_t domain_mask;
if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */
@@ -988,7 +995,7 @@ static int find_lowest_rq(struct task_struct *task)
* I guess we might want to change cpupri_find() to ignore those
* in the first place.
*/
cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
/*
* At this point we have built a mask of cpus representing the
@@ -998,7 +1005,7 @@ static int find_lowest_rq(struct task_struct *task)
* We prioritize the last cpu that the task executed on since
* it is most likely cache-hot in that location.
*/
if (cpu_isset(cpu, *lowest_mask))
if (cpumask_test_cpu(cpu, lowest_mask))
return cpu;
/*
@@ -1008,18 +1015,25 @@ static int find_lowest_rq(struct task_struct *task)
if (this_cpu == cpu)
this_cpu = -1; /* Skip this_cpu opt if the same */
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
cpumask_t domain_mask;
int best_cpu;
if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
cpus_and(domain_mask, sd->span, *lowest_mask);
cpumask_and(domain_mask,
sched_domain_span(sd),
lowest_mask);
best_cpu = pick_optimal_cpu(this_cpu,
&domain_mask);
if (best_cpu != -1)
return best_cpu;
best_cpu = pick_optimal_cpu(this_cpu,
domain_mask);
if (best_cpu != -1) {
free_cpumask_var(domain_mask);
return best_cpu;
}
}
}
free_cpumask_var(domain_mask);
}
/*
@@ -1054,8 +1068,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
* Also make sure that it wasn't scheduled on its rq.
*/
if (unlikely(task_rq(task) != rq ||
!cpu_isset(lowest_rq->cpu,
task->cpus_allowed) ||
!cpumask_test_cpu(lowest_rq->cpu,
&task->cpus_allowed) ||
task_running(rq, task) ||
!task->se.on_rq)) {
@@ -1176,7 +1190,7 @@ static int pull_rt_task(struct rq *this_rq)
next = pick_next_task_rt(this_rq);
for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
for_each_cpu(cpu, this_rq->rd->rto_mask) {
if (this_cpu == cpu)
continue;
@@ -1305,9 +1319,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
}
static void set_cpus_allowed_rt(struct task_struct *p,
const cpumask_t *new_mask)
const struct cpumask *new_mask)
{
int weight = cpus_weight(*new_mask);
int weight = cpumask_weight(new_mask);
BUG_ON(!rt_task(p));
@@ -1328,7 +1342,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
update_rt_migration(rq);
}
p->cpus_allowed = *new_mask;
cpumask_copy(&p->cpus_allowed, new_mask);
p->rt.nr_cpus_allowed = weight;
}
@@ -1371,6 +1385,15 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
if (!rq->rt.rt_nr_running)
pull_rt_task(rq);
}
static inline void init_sched_rt_class(void)
{
unsigned int i;
for_each_possible_cpu(i)
alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
GFP_KERNEL, cpu_to_node(i));
}
#endif /* CONFIG_SMP */
/*
@@ -1541,3 +1564,4 @@ static void print_rt_stats(struct seq_file *m, int cpu)
rcu_read_unlock();
}
#endif /* CONFIG_SCHED_DEBUG */

View File

@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
for_each_domain(cpu, sd) {
enum cpu_idle_type itype;
cpumask_scnprintf(mask_str, mask_len, sd->span);
cpumask_scnprintf(mask_str, mask_len,
sched_domain_span(sd));
seq_printf(seq, "domain%d %s", dcount++, mask_str);
for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
itype++) {

View File

@@ -858,7 +858,8 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_USER;
q->info.si_pid = task_pid_vnr(current);
q->info.si_pid = task_tgid_nr_ns(current,
task_active_pid_ns(t));
q->info.si_uid = current_uid();
break;
case (unsigned long) SEND_SIG_PRIV:

View File

@@ -24,8 +24,8 @@ struct call_function_data {
struct call_single_data csd;
spinlock_t lock;
unsigned int refs;
cpumask_t cpumask;
struct rcu_head rcu_head;
unsigned long cpumask_bits[];
};
struct call_single_queue {
@@ -110,13 +110,13 @@ void generic_smp_call_function_interrupt(void)
list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
int refs;
if (!cpu_isset(cpu, data->cpumask))
if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
continue;
data->csd.func(data->csd.info);
spin_lock(&data->lock);
cpu_clear(cpu, data->cpumask);
cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
WARN_ON(data->refs == 0);
data->refs--;
refs = data->refs;
@@ -223,7 +223,7 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
local_irq_save(flags);
func(info);
local_irq_restore(flags);
} else if ((unsigned)cpu < NR_CPUS && cpu_online(cpu)) {
} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
struct call_single_data *data = NULL;
if (!wait) {
@@ -266,51 +266,19 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
generic_exec_single(cpu, data);
}
/* Dummy function */
static void quiesce_dummy(void *unused)
{
}
/*
* Ensure stack based data used in call function mask is safe to free.
*
* This is needed by smp_call_function_mask when using on-stack data, because
* a single call function queue is shared by all CPUs, and any CPU may pick up
* the data item on the queue at any time before it is deleted. So we need to
* ensure that all CPUs have transitioned through a quiescent state after
* this call.
*
* This is a very slow function, implemented by sending synchronous IPIs to
* all possible CPUs. For this reason, we have to alloc data rather than use
* stack based data even in the case of synchronous calls. The stack based
* data is then just used for deadlock/oom fallback which will be very rare.
*
* If a faster scheme can be made, we could go back to preferring stack based
* data -- the data allocation/free is non-zero cost.
*/
static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
{
struct call_single_data data;
int cpu;
data.func = quiesce_dummy;
data.info = NULL;
for_each_cpu_mask(cpu, mask) {
data.flags = CSD_FLAG_WAIT;
generic_exec_single(cpu, &data);
}
}
/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
#ifndef arch_send_call_function_ipi_mask
#define arch_send_call_function_ipi_mask(maskp) \
arch_send_call_function_ipi(*(maskp))
#endif
/**
* smp_call_function_mask(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on.
* smp_call_function_many(): Run a function on a set of other CPUs.
* @mask: The set of cpus to run on (only runs on online subset).
* @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed on other CPUs.
*
* Returns 0 on success, else a negative status code.
*
* If @wait is true, then returns once @func has returned. Note that @wait
* will be implicitly turned on in case of allocation failures, since
* we fall back to on-stack allocation.
@@ -319,53 +287,57 @@ static void smp_call_function_mask_quiesce_stack(cpumask_t mask)
* hardware interrupt handler or from a bottom half handler. Preemption
* must be disabled when calling this function.
*/
int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
int wait)
void smp_call_function_many(const struct cpumask *mask,
void (*func)(void *), void *info,
bool wait)
{
struct call_function_data d;
struct call_function_data *data = NULL;
cpumask_t allbutself;
struct call_function_data *data;
unsigned long flags;
int cpu, num_cpus;
int slowpath = 0;
int cpu, next_cpu;
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
cpu = smp_processor_id();
allbutself = cpu_online_map;
cpu_clear(cpu, allbutself);
cpus_and(mask, mask, allbutself);
num_cpus = cpus_weight(mask);
/* So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
if (cpu == smp_processor_id())
cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
/* No online cpus? We're done. */
if (cpu >= nr_cpu_ids)
return;
/*
* If zero CPUs, return. If just a single CPU, turn this request
* into a targetted single call instead since it's faster.
*/
if (!num_cpus)
return 0;
else if (num_cpus == 1) {
cpu = first_cpu(mask);
return smp_call_function_single(cpu, func, info, wait);
/* Do we have another CPU which isn't us? */
next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
if (next_cpu == smp_processor_id())
next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
/* Fastpath: do that cpu by itself. */
if (next_cpu >= nr_cpu_ids) {
smp_call_function_single(cpu, func, info, wait);
return;
}
data = kmalloc(sizeof(*data), GFP_ATOMIC);
if (data) {
data->csd.flags = CSD_FLAG_ALLOC;
if (wait)
data->csd.flags |= CSD_FLAG_WAIT;
} else {
data = &d;
data->csd.flags = CSD_FLAG_WAIT;
wait = 1;
slowpath = 1;
data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
if (unlikely(!data)) {
/* Slow path. */
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
if (cpumask_test_cpu(cpu, mask))
smp_call_function_single(cpu, func, info, wait);
}
return;
}
spin_lock_init(&data->lock);
data->csd.flags = CSD_FLAG_ALLOC;
if (wait)
data->csd.flags |= CSD_FLAG_WAIT;
data->csd.func = func;
data->csd.info = info;
data->refs = num_cpus;
data->cpumask = mask;
cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
spin_lock_irqsave(&call_function_lock, flags);
list_add_tail_rcu(&data->csd.list, &call_function_queue);
@@ -377,18 +349,13 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
smp_mb();
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi(mask);
arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
/* optionally wait for the CPUs to complete */
if (wait) {
if (wait)
csd_flag_wait(&data->csd);
if (unlikely(slowpath))
smp_call_function_mask_quiesce_stack(mask);
}
return 0;
}
EXPORT_SYMBOL(smp_call_function_mask);
EXPORT_SYMBOL(smp_call_function_many);
/**
* smp_call_function(): Run a function on all other CPUs.
@@ -396,7 +363,7 @@ EXPORT_SYMBOL(smp_call_function_mask);
* @info: An arbitrary pointer to pass to the function.
* @wait: If true, wait (atomically) until function has completed on other CPUs.
*
* Returns 0 on success, else a negative status code.
* Returns 0.
*
* If @wait is true, then returns once @func has returned; otherwise
* it returns just before the target cpu calls @func. In case of allocation
@@ -407,12 +374,10 @@ EXPORT_SYMBOL(smp_call_function_mask);
*/
int smp_call_function(void (*func)(void *), void *info, int wait)
{
int ret;
preempt_disable();
ret = smp_call_function_mask(cpu_online_map, func, info, wait);
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
return ret;
return 0;
}
EXPORT_SYMBOL(smp_call_function);

View File

@@ -733,7 +733,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(ksoftirqd, hotcpu),
any_online_cpu(cpu_online_map));
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN: {
struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -784,3 +784,28 @@ int on_each_cpu(void (*func) (void *info), void *info, int wait)
}
EXPORT_SYMBOL(on_each_cpu);
#endif
/*
* [ These __weak aliases are kept in a separate compilation unit, so that
* GCC does not inline them incorrectly. ]
*/
int __init __weak early_irq_init(void)
{
return 0;
}
int __init __weak arch_probe_nr_irqs(void)
{
return 0;
}
int __init __weak arch_early_irq_init(void)
{
return 0;
}
int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
{
return 0;
}

View File

@@ -303,17 +303,15 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
check_cpu = any_online_cpu(cpu_online_map);
check_cpu = cpumask_any(cpu_online_mask);
wake_up_process(per_cpu(watchdog_task, hotcpu));
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
if (hotcpu == check_cpu) {
cpumask_t temp_cpu_online_map = cpu_online_map;
cpu_clear(hotcpu, temp_cpu_online_map);
check_cpu = any_online_cpu(temp_cpu_online_map);
/* Pick any other online cpu. */
check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
}
break;
@@ -323,7 +321,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
break;
/* Unbind so it can run. Fall thru. */
kthread_bind(per_cpu(watchdog_task, hotcpu),
any_online_cpu(cpu_online_map));
cpumask_any(cpu_online_mask));
case CPU_DEAD:
case CPU_DEAD_FROZEN:
p = per_cpu(watchdog_task, hotcpu);

View File

@@ -38,7 +38,10 @@ struct stop_machine_data {
static unsigned int num_threads;
static atomic_t thread_ack;
static DEFINE_MUTEX(lock);
/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
static DEFINE_MUTEX(setup_lock);
/* Users of stop_machine. */
static int refcount;
static struct workqueue_struct *stop_machine_wq;
static struct stop_machine_data active, idle;
static const cpumask_t *active_cpus;
@@ -69,10 +72,10 @@ static void stop_cpu(struct work_struct *unused)
int err;
if (!active_cpus) {
if (cpu == first_cpu(cpu_online_map))
if (cpu == cpumask_first(cpu_online_mask))
smdata = &active;
} else {
if (cpu_isset(cpu, *active_cpus))
if (cpumask_test_cpu(cpu, active_cpus))
smdata = &active;
}
/* Simple state machine */
@@ -109,7 +112,44 @@ static int chill(void *unused)
return 0;
}
int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
int stop_machine_create(void)
{
mutex_lock(&setup_lock);
if (refcount)
goto done;
stop_machine_wq = create_rt_workqueue("kstop");
if (!stop_machine_wq)
goto err_out;
stop_machine_work = alloc_percpu(struct work_struct);
if (!stop_machine_work)
goto err_out;
done:
refcount++;
mutex_unlock(&setup_lock);
return 0;
err_out:
if (stop_machine_wq)
destroy_workqueue(stop_machine_wq);
mutex_unlock(&setup_lock);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(stop_machine_create);
void stop_machine_destroy(void)
{
mutex_lock(&setup_lock);
refcount--;
if (refcount)
goto done;
destroy_workqueue(stop_machine_wq);
free_percpu(stop_machine_work);
done:
mutex_unlock(&setup_lock);
}
EXPORT_SYMBOL_GPL(stop_machine_destroy);
int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
{
struct work_struct *sm_work;
int i, ret;
@@ -142,23 +182,18 @@ int __stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
return ret;
}
int stop_machine(int (*fn)(void *), void *data, const cpumask_t *cpus)
int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
{
int ret;
ret = stop_machine_create();
if (ret)
return ret;
/* No CPUs can come up or down during this. */
get_online_cpus();
ret = __stop_machine(fn, data, cpus);
put_online_cpus();
stop_machine_destroy();
return ret;
}
EXPORT_SYMBOL_GPL(stop_machine);
static int __init stop_machine_init(void)
{
stop_machine_wq = create_rt_workqueue("kstop");
stop_machine_work = alloc_percpu(struct work_struct);
return 0;
}
core_initcall(stop_machine_init);

View File

@@ -33,6 +33,7 @@
#include <linux/task_io_accounting_ops.h>
#include <linux/seccomp.h>
#include <linux/cpu.h>
#include <linux/ptrace.h>
#include <linux/compat.h>
#include <linux/syscalls.h>
@@ -927,6 +928,7 @@ asmlinkage long sys_times(struct tms __user * tbuf)
if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
return -EFAULT;
}
force_successful_syscall_return();
return (long) jiffies_64_to_clock_t(get_jiffies_64());
}
@@ -1627,6 +1629,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
utime = stime = cputime_zero;
if (who == RUSAGE_THREAD) {
utime = task_utime(current);
stime = task_stime(current);
accumulate_thread_rusage(p, r);
goto out;
}

View File

@@ -82,15 +82,14 @@ extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
#ifndef CONFIG_MMU
extern int sysctl_nr_trim_pages;
#endif
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable;
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
/* Constants used for minimum and maximum */
#if defined(CONFIG_HIGHMEM) || defined(CONFIG_DETECT_SOFTLOCKUP)
static int one = 1;
#endif
#ifdef CONFIG_DETECT_SOFTLOCKUP
static int sixty = 60;
static int neg_one = -1;
@@ -101,6 +100,7 @@ static int two = 2;
#endif
static int zero;
static int one = 1;
static int one_hundred = 100;
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
@@ -952,11 +952,21 @@ static struct ctl_table vm_table[] = {
.data = &dirty_background_ratio,
.maxlen = sizeof(dirty_background_ratio),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.proc_handler = &dirty_background_ratio_handler,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "dirty_background_bytes",
.data = &dirty_background_bytes,
.maxlen = sizeof(dirty_background_bytes),
.mode = 0644,
.proc_handler = &dirty_background_bytes_handler,
.strategy = &sysctl_intvec,
.extra1 = &one,
},
{
.ctl_name = VM_DIRTY_RATIO,
.procname = "dirty_ratio",
@@ -968,6 +978,16 @@ static struct ctl_table vm_table[] = {
.extra1 = &zero,
.extra2 = &one_hundred,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "dirty_bytes",
.data = &vm_dirty_bytes,
.maxlen = sizeof(vm_dirty_bytes),
.mode = 0644,
.proc_handler = &dirty_bytes_handler,
.strategy = &sysctl_intvec,
.extra1 = &one,
},
{
.procname = "dirty_writeback_centisecs",
.data = &dirty_writeback_interval,
@@ -1085,6 +1105,17 @@ static struct ctl_table vm_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec
},
#else
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nr_trim_pages",
.data = &sysctl_nr_trim_pages,
.maxlen = sizeof(sysctl_nr_trim_pages),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
},
#endif
{
.ctl_name = VM_LAPTOP_MODE,

View File

@@ -290,18 +290,17 @@ ret:
return;
}
static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd)
{
struct listener_list *listeners;
struct listener *s, *tmp;
unsigned int cpu;
cpumask_t mask = *maskp;
if (!cpus_subset(mask, cpu_possible_map))
if (!cpumask_subset(mask, cpu_possible_mask))
return -EINVAL;
if (isadd == REGISTER) {
for_each_cpu_mask_nr(cpu, mask) {
for_each_cpu(cpu, mask) {
s = kmalloc_node(sizeof(struct listener), GFP_KERNEL,
cpu_to_node(cpu));
if (!s)
@@ -320,7 +319,7 @@ static int add_del_listener(pid_t pid, cpumask_t *maskp, int isadd)
/* Deregister or cleanup */
cleanup:
for_each_cpu_mask_nr(cpu, mask) {
for_each_cpu(cpu, mask) {
listeners = &per_cpu(listener_array, cpu);
down_write(&listeners->sem);
list_for_each_entry_safe(s, tmp, &listeners->list, list) {
@@ -335,7 +334,7 @@ cleanup:
return 0;
}
static int parse(struct nlattr *na, cpumask_t *mask)
static int parse(struct nlattr *na, struct cpumask *mask)
{
char *data;
int len;
@@ -352,7 +351,7 @@ static int parse(struct nlattr *na, cpumask_t *mask)
if (!data)
return -ENOMEM;
nla_strlcpy(data, na, len);
ret = cpulist_parse(data, *mask);
ret = cpulist_parse(data, mask);
kfree(data);
return ret;
}
@@ -428,23 +427,33 @@ err:
static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
int rc;
struct sk_buff *rep_skb;
struct taskstats *stats;
size_t size;
cpumask_t mask;
cpumask_var_t mask;
rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], &mask);
if (rc < 0)
return rc;
if (rc == 0)
return add_del_listener(info->snd_pid, &mask, REGISTER);
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return -ENOMEM;
rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], &mask);
rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask);
if (rc < 0)
goto free_return_rc;
if (rc == 0) {
rc = add_del_listener(info->snd_pid, mask, REGISTER);
goto free_return_rc;
}
rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask);
if (rc < 0)
goto free_return_rc;
if (rc == 0) {
rc = add_del_listener(info->snd_pid, mask, DEREGISTER);
free_return_rc:
free_cpumask_var(mask);
return rc;
if (rc == 0)
return add_del_listener(info->snd_pid, &mask, DEREGISTER);
}
free_cpumask_var(mask);
/*
* Size includes space for nested attributes

View File

@@ -22,21 +22,11 @@
static u32 rand1, preh_val, posth_val, jph_val;
static int errors, handler_errors, num_tests;
static u32 (*target)(u32 value);
static u32 (*target2)(u32 value);
static noinline u32 kprobe_target(u32 value)
{
/*
* gcc ignores noinline on some architectures unless we stuff
* sufficient lard into the function. The get_kprobe() here is
* just for that.
*
* NOTE: We aren't concerned about the correctness of get_kprobe()
* here; hence, this call is neither under !preempt nor with the
* kprobe_mutex held. This is fine(tm)
*/
if (get_kprobe((void *)0xdeadbeef))
printk(KERN_INFO "Kprobe smoke test: probe on 0xdeadbeef!\n");
return (value / div_factor);
}
@@ -74,7 +64,7 @@ static int test_kprobe(void)
return ret;
}
ret = kprobe_target(rand1);
ret = target(rand1);
unregister_kprobe(&kp);
if (preh_val == 0) {
@@ -92,6 +82,84 @@ static int test_kprobe(void)
return 0;
}
static noinline u32 kprobe_target2(u32 value)
{
return (value / div_factor) + 1;
}
static int kp_pre_handler2(struct kprobe *p, struct pt_regs *regs)
{
preh_val = (rand1 / div_factor) + 1;
return 0;
}
static void kp_post_handler2(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
if (preh_val != (rand1 / div_factor) + 1) {
handler_errors++;
printk(KERN_ERR "Kprobe smoke test failed: "
"incorrect value in post_handler2\n");
}
posth_val = preh_val + div_factor;
}
static struct kprobe kp2 = {
.symbol_name = "kprobe_target2",
.pre_handler = kp_pre_handler2,
.post_handler = kp_post_handler2
};
static int test_kprobes(void)
{
int ret;
struct kprobe *kps[2] = {&kp, &kp2};
kp.addr = 0; /* addr should be cleard for reusing kprobe. */
ret = register_kprobes(kps, 2);
if (ret < 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"register_kprobes returned %d\n", ret);
return ret;
}
preh_val = 0;
posth_val = 0;
ret = target(rand1);
if (preh_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kprobe pre_handler not called\n");
handler_errors++;
}
if (posth_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kprobe post_handler not called\n");
handler_errors++;
}
preh_val = 0;
posth_val = 0;
ret = target2(rand1);
if (preh_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kprobe pre_handler2 not called\n");
handler_errors++;
}
if (posth_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kprobe post_handler2 not called\n");
handler_errors++;
}
unregister_kprobes(kps, 2);
return 0;
}
static u32 j_kprobe_target(u32 value)
{
if (value != rand1) {
@@ -121,7 +189,7 @@ static int test_jprobe(void)
return ret;
}
ret = kprobe_target(rand1);
ret = target(rand1);
unregister_jprobe(&jp);
if (jph_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
@@ -132,6 +200,43 @@ static int test_jprobe(void)
return 0;
}
static struct jprobe jp2 = {
.entry = j_kprobe_target,
.kp.symbol_name = "kprobe_target2"
};
static int test_jprobes(void)
{
int ret;
struct jprobe *jps[2] = {&jp, &jp2};
jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
ret = register_jprobes(jps, 2);
if (ret < 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"register_jprobes returned %d\n", ret);
return ret;
}
jph_val = 0;
ret = target(rand1);
if (jph_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"jprobe handler not called\n");
handler_errors++;
}
jph_val = 0;
ret = target2(rand1);
if (jph_val == 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"jprobe handler2 not called\n");
handler_errors++;
}
unregister_jprobes(jps, 2);
return 0;
}
#ifdef CONFIG_KRETPROBES
static u32 krph_val;
@@ -177,7 +282,7 @@ static int test_kretprobe(void)
return ret;
}
ret = kprobe_target(rand1);
ret = target(rand1);
unregister_kretprobe(&rp);
if (krph_val != rand1) {
printk(KERN_ERR "Kprobe smoke test failed: "
@@ -187,12 +292,72 @@ static int test_kretprobe(void)
return 0;
}
static int return_handler2(struct kretprobe_instance *ri, struct pt_regs *regs)
{
unsigned long ret = regs_return_value(regs);
if (ret != (rand1 / div_factor) + 1) {
handler_errors++;
printk(KERN_ERR "Kprobe smoke test failed: "
"incorrect value in kretprobe handler2\n");
}
if (krph_val == 0) {
handler_errors++;
printk(KERN_ERR "Kprobe smoke test failed: "
"call to kretprobe entry handler failed\n");
}
krph_val = rand1;
return 0;
}
static struct kretprobe rp2 = {
.handler = return_handler2,
.entry_handler = entry_handler,
.kp.symbol_name = "kprobe_target2"
};
static int test_kretprobes(void)
{
int ret;
struct kretprobe *rps[2] = {&rp, &rp2};
rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */
ret = register_kretprobes(rps, 2);
if (ret < 0) {
printk(KERN_ERR "Kprobe smoke test failed: "
"register_kretprobe returned %d\n", ret);
return ret;
}
krph_val = 0;
ret = target(rand1);
if (krph_val != rand1) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kretprobe handler not called\n");
handler_errors++;
}
krph_val = 0;
ret = target2(rand1);
if (krph_val != rand1) {
printk(KERN_ERR "Kprobe smoke test failed: "
"kretprobe handler2 not called\n");
handler_errors++;
}
unregister_kretprobes(rps, 2);
return 0;
}
#endif /* CONFIG_KRETPROBES */
int init_test_probes(void)
{
int ret;
target = kprobe_target;
target2 = kprobe_target2;
do {
rand1 = random32();
} while (rand1 <= div_factor);
@@ -203,16 +368,31 @@ int init_test_probes(void)
if (ret < 0)
errors++;
num_tests++;
ret = test_kprobes();
if (ret < 0)
errors++;
num_tests++;
ret = test_jprobe();
if (ret < 0)
errors++;
num_tests++;
ret = test_jprobes();
if (ret < 0)
errors++;
#ifdef CONFIG_KRETPROBES
num_tests++;
ret = test_kretprobe();
if (ret < 0)
errors++;
num_tests++;
ret = test_kretprobes();
if (ret < 0)
errors++;
#endif /* CONFIG_KRETPROBES */
if (errors)

View File

@@ -37,6 +37,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/ptrace.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -65,8 +66,9 @@ asmlinkage long sys_time(time_t __user * tloc)
if (tloc) {
if (put_user(i,tloc))
i = -EFAULT;
return -EFAULT;
}
force_successful_syscall_return();
return i;
}

View File

@@ -166,6 +166,8 @@ static void clockevents_notify_released(void)
void clockevents_register_device(struct clock_event_device *dev)
{
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:

View File

@@ -145,10 +145,11 @@ static void clocksource_watchdog(unsigned long data)
* Cycle through CPUs to check if the CPUs stay
* synchronized to each other.
*/
int next_cpu = next_cpu_nr(raw_smp_processor_id(), cpu_online_map);
int next_cpu = cpumask_next(raw_smp_processor_id(),
cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = first_cpu(cpu_online_map);
next_cpu = cpumask_first(cpu_online_mask);
watchdog_timer.expires += WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer, next_cpu);
}
@@ -173,7 +174,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
watchdog_last = watchdog->read();
watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer,
first_cpu(cpu_online_map));
cpumask_first(cpu_online_mask));
}
} else {
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
@@ -195,7 +196,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
watchdog_timer.expires =
jiffies + WATCHDOG_INTERVAL;
add_timer_on(&watchdog_timer,
first_cpu(cpu_online_map));
cpumask_first(cpu_online_mask));
}
}
}

View File

@@ -45,7 +45,7 @@
*
* The value 8 is somewhat carefully chosen, as anything
* larger can result in overflows. NSEC_PER_JIFFY grows as
* HZ shrinks, so values greater then 8 overflow 32bits when
* HZ shrinks, so values greater than 8 overflow 32bits when
* HZ=100.
*/
#define JIFFIES_SHIFT 8

View File

@@ -28,7 +28,9 @@
*/
struct tick_device tick_broadcast_device;
static cpumask_t tick_broadcast_mask;
/* FIXME: Use cpumask_var_t. */
static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
static DECLARE_BITMAP(tmpmask, NR_CPUS);
static DEFINE_SPINLOCK(tick_broadcast_lock);
static int tick_broadcast_force;
@@ -46,9 +48,9 @@ struct tick_device *tick_get_broadcast_device(void)
return &tick_broadcast_device;
}
cpumask_t *tick_get_broadcast_mask(void)
struct cpumask *tick_get_broadcast_mask(void)
{
return &tick_broadcast_mask;
return to_cpumask(tick_broadcast_mask);
}
/*
@@ -72,7 +74,7 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
clockevents_exchange_device(NULL, dev);
tick_broadcast_device.evtdev = dev;
if (!cpus_empty(tick_broadcast_mask))
if (!cpumask_empty(tick_get_broadcast_mask()))
tick_broadcast_start_periodic(dev);
return 1;
}
@@ -104,7 +106,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
*/
if (!tick_device_is_functional(dev)) {
dev->event_handler = tick_handle_periodic;
cpu_set(cpu, tick_broadcast_mask);
cpumask_set_cpu(cpu, tick_get_broadcast_mask());
tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
ret = 1;
} else {
@@ -116,7 +118,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
int cpu = smp_processor_id();
cpu_clear(cpu, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
tick_broadcast_clear_oneshot(cpu);
}
}
@@ -125,9 +127,9 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
}
/*
* Broadcast the event to the cpus, which are set in the mask
* Broadcast the event to the cpus, which are set in the mask (mangled).
*/
static void tick_do_broadcast(cpumask_t mask)
static void tick_do_broadcast(struct cpumask *mask)
{
int cpu = smp_processor_id();
struct tick_device *td;
@@ -135,21 +137,20 @@ static void tick_do_broadcast(cpumask_t mask)
/*
* Check, if the current cpu is in the mask
*/
if (cpu_isset(cpu, mask)) {
cpu_clear(cpu, mask);
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
td = &per_cpu(tick_cpu_device, cpu);
td->evtdev->event_handler(td->evtdev);
}
if (!cpus_empty(mask)) {
if (!cpumask_empty(mask)) {
/*
* It might be necessary to actually check whether the devices
* have different broadcast functions. For now, just use the
* one of the first device. This works as long as we have this
* misfeature only on x86 (lapic)
*/
cpu = first_cpu(mask);
td = &per_cpu(tick_cpu_device, cpu);
td = &per_cpu(tick_cpu_device, cpumask_first(mask));
td->evtdev->broadcast(mask);
}
}
@@ -160,12 +161,11 @@ static void tick_do_broadcast(cpumask_t mask)
*/
static void tick_do_periodic_broadcast(void)
{
cpumask_t mask;
spin_lock(&tick_broadcast_lock);
cpus_and(mask, cpu_online_map, tick_broadcast_mask);
tick_do_broadcast(mask);
cpumask_and(to_cpumask(tmpmask),
cpu_online_mask, tick_get_broadcast_mask());
tick_do_broadcast(to_cpumask(tmpmask));
spin_unlock(&tick_broadcast_lock);
}
@@ -228,13 +228,13 @@ static void tick_do_broadcast_on_off(void *why)
if (!tick_device_is_functional(dev))
goto out;
bc_stopped = cpus_empty(tick_broadcast_mask);
bc_stopped = cpumask_empty(tick_get_broadcast_mask());
switch (*reason) {
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
if (!cpu_isset(cpu, tick_broadcast_mask)) {
cpu_set(cpu, tick_broadcast_mask);
if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
cpumask_set_cpu(cpu, tick_get_broadcast_mask());
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
clockevents_shutdown(dev);
@@ -244,8 +244,8 @@ static void tick_do_broadcast_on_off(void *why)
break;
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
if (!tick_broadcast_force &&
cpu_isset(cpu, tick_broadcast_mask)) {
cpu_clear(cpu, tick_broadcast_mask);
cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
tick_setup_periodic(dev, 0);
@@ -253,7 +253,7 @@ static void tick_do_broadcast_on_off(void *why)
break;
}
if (cpus_empty(tick_broadcast_mask)) {
if (cpumask_empty(tick_get_broadcast_mask())) {
if (!bc_stopped)
clockevents_shutdown(bc);
} else if (bc_stopped) {
@@ -272,7 +272,7 @@ out:
*/
void tick_broadcast_on_off(unsigned long reason, int *oncpu)
{
if (!cpu_isset(*oncpu, cpu_online_map))
if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
"offline CPU #%d\n", *oncpu);
else
@@ -303,10 +303,10 @@ void tick_shutdown_broadcast(unsigned int *cpup)
spin_lock_irqsave(&tick_broadcast_lock, flags);
bc = tick_broadcast_device.evtdev;
cpu_clear(cpu, tick_broadcast_mask);
cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpus_empty(tick_broadcast_mask))
if (bc && cpumask_empty(tick_get_broadcast_mask()))
clockevents_shutdown(bc);
}
@@ -342,10 +342,10 @@ int tick_resume_broadcast(void)
switch (tick_broadcast_device.mode) {
case TICKDEV_MODE_PERIODIC:
if(!cpus_empty(tick_broadcast_mask))
if (!cpumask_empty(tick_get_broadcast_mask()))
tick_broadcast_start_periodic(bc);
broadcast = cpu_isset(smp_processor_id(),
tick_broadcast_mask);
broadcast = cpumask_test_cpu(smp_processor_id(),
tick_get_broadcast_mask());
break;
case TICKDEV_MODE_ONESHOT:
broadcast = tick_resume_broadcast_oneshot(bc);
@@ -360,14 +360,15 @@ int tick_resume_broadcast(void)
#ifdef CONFIG_TICK_ONESHOT
static cpumask_t tick_broadcast_oneshot_mask;
/* FIXME: use cpumask_var_t. */
static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
/*
* Debugging: see timer_list.c
* Exposed for debugging: see timer_list.c
*/
cpumask_t *tick_get_broadcast_oneshot_mask(void)
struct cpumask *tick_get_broadcast_oneshot_mask(void)
{
return &tick_broadcast_oneshot_mask;
return to_cpumask(tick_broadcast_oneshot_mask);
}
static int tick_broadcast_set_event(ktime_t expires, int force)
@@ -389,7 +390,7 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
*/
void tick_check_oneshot_broadcast(int cpu)
{
if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
@@ -402,7 +403,6 @@ void tick_check_oneshot_broadcast(int cpu)
static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
{
struct tick_device *td;
cpumask_t mask;
ktime_t now, next_event;
int cpu;
@@ -410,13 +410,13 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
again:
dev->next_event.tv64 = KTIME_MAX;
next_event.tv64 = KTIME_MAX;
mask = CPU_MASK_NONE;
cpumask_clear(to_cpumask(tmpmask));
now = ktime_get();
/* Find all expired events */
for_each_cpu_mask_nr(cpu, tick_broadcast_oneshot_mask) {
for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev->next_event.tv64 <= now.tv64)
cpu_set(cpu, mask);
cpumask_set_cpu(cpu, to_cpumask(tmpmask));
else if (td->evtdev->next_event.tv64 < next_event.tv64)
next_event.tv64 = td->evtdev->next_event.tv64;
}
@@ -424,7 +424,7 @@ again:
/*
* Wakeup the cpus which have an expired event.
*/
tick_do_broadcast(mask);
tick_do_broadcast(to_cpumask(tmpmask));
/*
* Two reasons for reprogram:
@@ -476,15 +476,16 @@ void tick_broadcast_oneshot_control(unsigned long reason)
goto out;
if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
if (!cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_set(cpu, tick_broadcast_oneshot_mask);
if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
if (dev->next_event.tv64 < bc->next_event.tv64)
tick_broadcast_set_event(dev->next_event, 1);
}
} else {
if (cpu_isset(cpu, tick_broadcast_oneshot_mask)) {
cpu_clear(cpu, tick_broadcast_oneshot_mask);
if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
cpumask_clear_cpu(cpu,
tick_get_broadcast_oneshot_mask());
clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
if (dev->next_event.tv64 != KTIME_MAX)
tick_program_event(dev->next_event, 1);
@@ -502,15 +503,16 @@ out:
*/
static void tick_broadcast_clear_oneshot(int cpu)
{
cpu_clear(cpu, tick_broadcast_oneshot_mask);
cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
}
static void tick_broadcast_init_next_event(cpumask_t *mask, ktime_t expires)
static void tick_broadcast_init_next_event(struct cpumask *mask,
ktime_t expires)
{
struct tick_device *td;
int cpu;
for_each_cpu_mask_nr(cpu, *mask) {
for_each_cpu(cpu, mask) {
td = &per_cpu(tick_cpu_device, cpu);
if (td->evtdev)
td->evtdev->next_event = expires;
@@ -526,7 +528,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
if (bc->event_handler != tick_handle_oneshot_broadcast) {
int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
int cpu = smp_processor_id();
cpumask_t mask;
bc->event_handler = tick_handle_oneshot_broadcast;
clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
@@ -540,13 +541,15 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
* oneshot_mask bits for those and program the
* broadcast device to fire.
*/
mask = tick_broadcast_mask;
cpu_clear(cpu, mask);
cpus_or(tick_broadcast_oneshot_mask,
tick_broadcast_oneshot_mask, mask);
cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
cpumask_or(tick_get_broadcast_oneshot_mask(),
tick_get_broadcast_oneshot_mask(),
to_cpumask(tmpmask));
if (was_periodic && !cpus_empty(mask)) {
tick_broadcast_init_next_event(&mask, tick_next_period);
if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
tick_broadcast_init_next_event(to_cpumask(tmpmask),
tick_next_period);
tick_broadcast_set_event(tick_next_period, 1);
} else
bc->next_event.tv64 = KTIME_MAX;
@@ -585,7 +588,7 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
* Clear the broadcast mask flag for the dead cpu, but do not
* stop the broadcast device!
*/
cpu_clear(cpu, tick_broadcast_oneshot_mask);
cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}

View File

@@ -136,7 +136,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
*/
static void tick_setup_device(struct tick_device *td,
struct clock_event_device *newdev, int cpu,
const cpumask_t *cpumask)
const struct cpumask *cpumask)
{
ktime_t next_event;
void (*handler)(struct clock_event_device *) = NULL;
@@ -171,8 +171,8 @@ static void tick_setup_device(struct tick_device *td,
* When the device is not per cpu, pin the interrupt to the
* current cpu:
*/
if (!cpus_equal(newdev->cpumask, *cpumask))
irq_set_affinity(newdev->irq, *cpumask);
if (!cpumask_equal(newdev->cpumask, cpumask))
irq_set_affinity(newdev->irq, cpumask);
/*
* When global broadcasting is active, check if the current
@@ -202,14 +202,14 @@ static int tick_check_new_device(struct clock_event_device *newdev)
spin_lock_irqsave(&tick_device_lock, flags);
cpu = smp_processor_id();
if (!cpu_isset(cpu, newdev->cpumask))
if (!cpumask_test_cpu(cpu, newdev->cpumask))
goto out_bc;
td = &per_cpu(tick_cpu_device, cpu);
curdev = td->evtdev;
/* cpu local device ? */
if (!cpus_equal(newdev->cpumask, cpumask_of_cpu(cpu))) {
if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
/*
* If the cpu affinity of the device interrupt can not
@@ -222,7 +222,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
* If we have a cpu local device already, do not replace it
* by a non cpu local device
*/
if (curdev && cpus_equal(curdev->cpumask, cpumask_of_cpu(cpu)))
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
goto out_bc;
}
@@ -254,7 +254,7 @@ static int tick_check_new_device(struct clock_event_device *newdev)
curdev = NULL;
}
clockevents_exchange_device(curdev, newdev);
tick_setup_device(td, newdev, cpu, &cpumask_of_cpu(cpu));
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
tick_oneshot_notify();
@@ -299,9 +299,9 @@ static void tick_shutdown(unsigned int *cpup)
}
/* Transfer the do_timer job away from this cpu */
if (*cpup == tick_do_timer_cpu) {
int cpu = first_cpu(cpu_online_map);
int cpu = cpumask_first(cpu_online_mask);
tick_do_timer_cpu = (cpu != NR_CPUS) ? cpu :
tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
TICK_DO_TIMER_NONE;
}
spin_unlock_irqrestore(&tick_device_lock, flags);

View File

@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
if (!ts->tick_stopped)
return;
cpu_clear(cpu, nohz_cpu_mask);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
now = ktime_get();
ts->idle_waketime = now;
@@ -301,7 +301,7 @@ void tick_nohz_stop_sched_tick(int inidle)
tick_do_timer_cpu = TICK_DO_TIMER_NONE;
if (delta_jiffies > 1)
cpu_set(cpu, nohz_cpu_mask);
cpumask_set_cpu(cpu, nohz_cpu_mask);
/* Skip reprogram of event if its not changed */
if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
@@ -319,7 +319,7 @@ void tick_nohz_stop_sched_tick(int inidle)
/*
* sched tick not stopped!
*/
cpu_clear(cpu, nohz_cpu_mask);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
goto out;
}
@@ -361,7 +361,7 @@ void tick_nohz_stop_sched_tick(int inidle)
* softirq.
*/
tick_do_update_jiffies64(ktime_get());
cpu_clear(cpu, nohz_cpu_mask);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
}
raise_softirq_irqoff(TIMER_SOFTIRQ);
out:
@@ -419,7 +419,9 @@ void tick_nohz_restart_sched_tick(void)
{
int cpu = smp_processor_id();
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
unsigned long ticks;
#endif
ktime_t now;
local_irq_disable();
@@ -439,8 +441,9 @@ void tick_nohz_restart_sched_tick(void)
select_nohz_load_balancer(0);
now = ktime_get();
tick_do_update_jiffies64(now);
cpu_clear(cpu, nohz_cpu_mask);
cpumask_clear_cpu(cpu, nohz_cpu_mask);
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
@@ -450,12 +453,9 @@ void tick_nohz_restart_sched_tick(void)
/*
* We might be one off. Do not randomly account a huge number of ticks!
*/
if (ticks && ticks < LONG_MAX) {
add_preempt_count(HARDIRQ_OFFSET);
account_system_time(current, HARDIRQ_OFFSET,
jiffies_to_cputime(ticks));
sub_preempt_count(HARDIRQ_OFFSET);
}
if (ticks && ticks < LONG_MAX)
account_idle_ticks(ticks);
#endif
touch_softlockup_watchdog();
/*

View File

@@ -46,6 +46,9 @@ struct timespec xtime __attribute__ ((aligned (16)));
struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
static unsigned long total_sleep_time; /* seconds */
/* flag for if timekeeping is suspended */
int __read_mostly timekeeping_suspended;
static struct timespec xtime_cache __attribute__ ((aligned (16)));
void update_xtime_cache(u64 nsec)
{
@@ -92,6 +95,8 @@ void getnstimeofday(struct timespec *ts)
unsigned long seq;
s64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqbegin(&xtime_lock);
@@ -299,8 +304,6 @@ void __init timekeeping_init(void)
write_sequnlock_irqrestore(&xtime_lock, flags);
}
/* flag for if timekeeping is suspended */
static int timekeeping_suspended;
/* time in seconds when suspend began */
static unsigned long timekeeping_suspend_time;

View File

@@ -1018,21 +1018,6 @@ unsigned long get_next_timer_interrupt(unsigned long now)
}
#endif
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t one_jiffy = jiffies_to_cputime(1);
if (user_tick) {
account_user_time(p, one_jiffy);
account_user_time_scaled(p, cputime_to_scaled(one_jiffy));
} else {
account_system_time(p, HARDIRQ_OFFSET, one_jiffy);
account_system_time_scaled(p, cputime_to_scaled(one_jiffy));
}
}
#endif
/*
* Called from the timer interrupt handler to charge one tick to the current
* process. user_tick is 1 if the tick is user time, 0 for system.

View File

@@ -168,7 +168,13 @@ rb_event_length(struct ring_buffer_event *event)
*/
unsigned ring_buffer_event_length(struct ring_buffer_event *event)
{
return rb_event_length(event);
unsigned length = rb_event_length(event);
if (event->type != RINGBUF_TYPE_DATA)
return length;
length -= RB_EVNT_HDR_SIZE;
if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
length -= sizeof(event->array[0]);
return length;
}
EXPORT_SYMBOL_GPL(ring_buffer_event_length);
@@ -195,7 +201,7 @@ void *ring_buffer_event_data(struct ring_buffer_event *event)
EXPORT_SYMBOL_GPL(ring_buffer_event_data);
#define for_each_buffer_cpu(buffer, cpu) \
for_each_cpu_mask(cpu, buffer->cpumask)
for_each_cpu(cpu, buffer->cpumask)
#define TS_SHIFT 27
#define TS_MASK ((1ULL << TS_SHIFT) - 1)
@@ -267,7 +273,7 @@ struct ring_buffer {
unsigned pages;
unsigned flags;
int cpus;
cpumask_t cpumask;
cpumask_var_t cpumask;
atomic_t record_disabled;
struct mutex mutex;
@@ -458,6 +464,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
if (!buffer)
return NULL;
if (!alloc_cpumask_var(&buffer->cpumask, GFP_KERNEL))
goto fail_free_buffer;
buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
buffer->flags = flags;
@@ -465,14 +474,14 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
if (buffer->pages == 1)
buffer->pages++;
buffer->cpumask = cpu_possible_map;
cpumask_copy(buffer->cpumask, cpu_possible_mask);
buffer->cpus = nr_cpu_ids;
bsize = sizeof(void *) * nr_cpu_ids;
buffer->buffers = kzalloc(ALIGN(bsize, cache_line_size()),
GFP_KERNEL);
if (!buffer->buffers)
goto fail_free_buffer;
goto fail_free_cpumask;
for_each_buffer_cpu(buffer, cpu) {
buffer->buffers[cpu] =
@@ -492,6 +501,9 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
}
kfree(buffer->buffers);
fail_free_cpumask:
free_cpumask_var(buffer->cpumask);
fail_free_buffer:
kfree(buffer);
return NULL;
@@ -510,6 +522,8 @@ ring_buffer_free(struct ring_buffer *buffer)
for_each_buffer_cpu(buffer, cpu)
rb_free_cpu_buffer(buffer->buffers[cpu]);
free_cpumask_var(buffer->cpumask);
kfree(buffer);
}
EXPORT_SYMBOL_GPL(ring_buffer_free);
@@ -1283,7 +1297,7 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer,
cpu = raw_smp_processor_id();
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
goto out;
cpu_buffer = buffer->buffers[cpu];
@@ -1396,7 +1410,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
cpu = raw_smp_processor_id();
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
goto out;
cpu_buffer = buffer->buffers[cpu];
@@ -1478,7 +1492,7 @@ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
cpu_buffer = buffer->buffers[cpu];
@@ -1498,7 +1512,7 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
cpu_buffer = buffer->buffers[cpu];
@@ -1515,7 +1529,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 0;
cpu_buffer = buffer->buffers[cpu];
@@ -1532,7 +1546,7 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 0;
cpu_buffer = buffer->buffers[cpu];
@@ -1850,7 +1864,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
struct buffer_page *reader;
int nr_loops = 0;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
cpu_buffer = buffer->buffers[cpu];
@@ -2025,7 +2039,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
struct ring_buffer_event *event;
unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2062,7 +2076,7 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
struct ring_buffer_iter *iter;
unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return NULL;
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
@@ -2172,7 +2186,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
unsigned long flags;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
@@ -2228,7 +2242,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
{
struct ring_buffer_per_cpu *cpu_buffer;
if (!cpu_isset(cpu, buffer->cpumask))
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return 1;
cpu_buffer = buffer->buffers[cpu];
@@ -2252,8 +2266,8 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
struct ring_buffer_per_cpu *cpu_buffer_a;
struct ring_buffer_per_cpu *cpu_buffer_b;
if (!cpu_isset(cpu, buffer_a->cpumask) ||
!cpu_isset(cpu, buffer_b->cpumask))
if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
!cpumask_test_cpu(cpu, buffer_b->cpumask))
return -EINVAL;
/* At least make sure the two buffers are somewhat the same */

View File

@@ -89,10 +89,10 @@ static inline void ftrace_enable_cpu(void)
preempt_enable();
}
static cpumask_t __read_mostly tracing_buffer_mask;
static cpumask_var_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu_mask(cpu, tracing_buffer_mask)
for_each_cpu(cpu, tracing_buffer_mask)
/*
* ftrace_dump_on_oops - variable to dump ftrace buffer on oops
@@ -1811,10 +1811,10 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
return;
if (cpu_isset(iter->cpu, iter->started))
if (cpumask_test_cpu(iter->cpu, iter->started))
return;
cpu_set(iter->cpu, iter->started);
cpumask_set_cpu(iter->cpu, iter->started);
trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
}
@@ -2646,13 +2646,7 @@ static struct file_operations show_traces_fops = {
/*
* Only trace on a CPU if the bitmask is set:
*/
static cpumask_t tracing_cpumask = CPU_MASK_ALL;
/*
* When tracing/tracing_cpu_mask is modified then this holds
* the new bitmask we are about to install:
*/
static cpumask_t tracing_cpumask_new;
static cpumask_var_t tracing_cpumask;
/*
* The tracer itself will not take this lock, but still we want
@@ -2693,6 +2687,10 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *ppos)
{
int err, cpu;
cpumask_var_t tracing_cpumask_new;
if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
return -ENOMEM;
mutex_lock(&tracing_cpumask_update_lock);
err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
@@ -2706,26 +2704,28 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
* Increase/decrease the disabled counter if we are
* about to flip a bit in the cpumask:
*/
if (cpu_isset(cpu, tracing_cpumask) &&
!cpu_isset(cpu, tracing_cpumask_new)) {
if (cpumask_test_cpu(cpu, tracing_cpumask) &&
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&global_trace.data[cpu]->disabled);
}
if (!cpu_isset(cpu, tracing_cpumask) &&
cpu_isset(cpu, tracing_cpumask_new)) {
if (!cpumask_test_cpu(cpu, tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&global_trace.data[cpu]->disabled);
}
}
__raw_spin_unlock(&ftrace_max_lock);
local_irq_enable();
tracing_cpumask = tracing_cpumask_new;
cpumask_copy(tracing_cpumask, tracing_cpumask_new);
mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask_new);
return count;
err_unlock:
mutex_unlock(&tracing_cpumask_update_lock);
free_cpumask_var(tracing_cpumask);
return err;
}
@@ -3114,10 +3114,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (!iter)
return -ENOMEM;
if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
kfree(iter);
return -ENOMEM;
}
mutex_lock(&trace_types_lock);
/* trace pipe does not show start of buffer */
cpus_setall(iter->started);
cpumask_setall(iter->started);
iter->tr = &global_trace;
iter->trace = current_trace;
@@ -3134,6 +3139,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
{
struct trace_iterator *iter = file->private_data;
free_cpumask_var(iter->started);
kfree(iter);
atomic_dec(&tracing_reader);
@@ -3752,7 +3758,6 @@ void ftrace_dump(void)
static DEFINE_SPINLOCK(ftrace_dump_lock);
/* use static because iter can be a bit big for the stack */
static struct trace_iterator iter;
static cpumask_t mask;
static int dump_ran;
unsigned long flags;
int cnt = 0, cpu;
@@ -3786,8 +3791,6 @@ void ftrace_dump(void)
* and then release the locks again.
*/
cpus_clear(mask);
while (!trace_empty(&iter)) {
if (!cnt)
@@ -3823,19 +3826,28 @@ __init static int tracer_alloc_buffers(void)
{
struct trace_array_cpu *data;
int i;
int ret = -ENOMEM;
if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
goto out;
if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
goto out_free_buffer_mask;
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
cpumask_copy(tracing_cpumask, cpu_all_mask);
/* TODO: make the number of buffers hot pluggable with CPUS */
tracing_buffer_mask = cpu_possible_map;
global_trace.buffer = ring_buffer_alloc(trace_buf_size,
TRACE_BUFFER_FLAGS);
if (!global_trace.buffer) {
printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
WARN_ON(1);
return 0;
goto out_free_cpumask;
}
global_trace.entries = ring_buffer_size(global_trace.buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
max_tr.buffer = ring_buffer_alloc(trace_buf_size,
TRACE_BUFFER_FLAGS);
@@ -3843,7 +3855,7 @@ __init static int tracer_alloc_buffers(void)
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
ring_buffer_free(global_trace.buffer);
return 0;
goto out_free_cpumask;
}
max_tr.entries = ring_buffer_size(max_tr.buffer);
WARN_ON(max_tr.entries != global_trace.entries);
@@ -3873,8 +3885,14 @@ __init static int tracer_alloc_buffers(void)
&trace_panic_notifier);
register_die_notifier(&trace_die_notifier);
ret = 0;
return 0;
out_free_cpumask:
free_cpumask_var(tracing_cpumask);
out_free_buffer_mask:
free_cpumask_var(tracing_buffer_mask);
out:
return ret;
}
early_initcall(tracer_alloc_buffers);
fs_initcall(tracer_init_debugfs);

View File

@@ -368,7 +368,7 @@ struct trace_iterator {
loff_t pos;
long idx;
cpumask_t started;
cpumask_var_t started;
};
int tracing_is_enabled(void);

View File

@@ -42,7 +42,7 @@ static int boot_trace_init(struct trace_array *tr)
int cpu;
boot_trace = tr;
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
tracing_reset(tr, cpu);
tracing_sched_switch_assign_trace(tr);

View File

@@ -79,7 +79,7 @@ print_graph_cpu(struct trace_seq *s, int cpu)
int i;
int ret;
int log10_this = log10_cpu(cpu);
int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
int log10_all = log10_cpu(cpumask_weight(cpu_online_mask));
/*

View File

@@ -46,7 +46,7 @@ static void bts_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(tr);
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
}
@@ -62,7 +62,7 @@ static void bts_trace_stop(struct trace_array *tr)
{
int cpu;
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
}
@@ -172,7 +172,7 @@ static void trace_bts_prepare(struct trace_iterator *iter)
{
int cpu;
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
}

View File

@@ -39,7 +39,7 @@ static int power_trace_init(struct trace_array *tr)
trace_power_enabled = 1;
for_each_cpu_mask(cpu, cpu_possible_map)
for_each_cpu(cpu, cpu_possible_mask)
tracing_reset(tr, cpu);
return 0;
}

View File

@@ -196,9 +196,9 @@ static enum hrtimer_restart stack_trace_timer_fn(struct hrtimer *hrtimer)
return HRTIMER_RESTART;
}
static void start_stack_timer(int cpu)
static void start_stack_timer(void *unused)
{
struct hrtimer *hrtimer = &per_cpu(stack_trace_hrtimer, cpu);
struct hrtimer *hrtimer = &__get_cpu_var(stack_trace_hrtimer);
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
@@ -208,14 +208,7 @@ static void start_stack_timer(int cpu)
static void start_stack_timers(void)
{
cpumask_t saved_mask = current->cpus_allowed;
int cpu;
for_each_online_cpu(cpu) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
start_stack_timer(cpu);
}
set_cpus_allowed_ptr(current, &saved_mask);
on_each_cpu(start_stack_timer, NULL, 1);
}
static void stop_stack_timer(int cpu)

View File

@@ -92,8 +92,8 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
mm = get_task_mm(p);
if (mm) {
/* adjust to KB unit */
stats->hiwater_rss = mm->hiwater_rss * PAGE_SIZE / KB;
stats->hiwater_vm = mm->hiwater_vm * PAGE_SIZE / KB;
stats->hiwater_rss = get_mm_hiwater_rss(mm) * PAGE_SIZE / KB;
stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
mmput(mm);
}
stats->read_char = p->ioac.rchar;

21
kernel/up.c Normal file
View File

@@ -0,0 +1,21 @@
/*
* Uniprocessor-only support functions. The counterpart to kernel/smp.c
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/smp.h>
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
{
WARN_ON(cpu != 0);
local_irq_disable();
(func)(info);
local_irq_enable();
return 0;
}
EXPORT_SYMBOL(smp_call_function_single);

View File

@@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(workqueue_lock);
static LIST_HEAD(workqueues);
static int singlethread_cpu __read_mostly;
static cpumask_t cpu_singlethread_map __read_mostly;
static const struct cpumask *cpu_singlethread_map __read_mostly;
/*
* _cpu_down() first removes CPU from cpu_online_map, then CPU_DEAD
* flushes cwq->worklist. This means that flush_workqueue/wait_on_work
@@ -81,7 +81,7 @@ static cpumask_t cpu_singlethread_map __read_mostly;
* use cpu_possible_map, the cpumask below is more a documentation
* than optimization.
*/
static cpumask_t cpu_populated_map __read_mostly;
static cpumask_var_t cpu_populated_map __read_mostly;
/* If it's single threaded, it isn't in the list of workqueues. */
static inline int is_wq_single_threaded(struct workqueue_struct *wq)
@@ -89,10 +89,10 @@ static inline int is_wq_single_threaded(struct workqueue_struct *wq)
return wq->singlethread;
}
static const cpumask_t *wq_cpu_map(struct workqueue_struct *wq)
static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
{
return is_wq_single_threaded(wq)
? &cpu_singlethread_map : &cpu_populated_map;
? cpu_singlethread_map : cpu_populated_map;
}
static
@@ -410,7 +410,7 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
*/
void flush_workqueue(struct workqueue_struct *wq)
{
const cpumask_t *cpu_map = wq_cpu_map(wq);
const struct cpumask *cpu_map = wq_cpu_map(wq);
int cpu;
might_sleep();
@@ -532,7 +532,7 @@ static void wait_on_work(struct work_struct *work)
{
struct cpu_workqueue_struct *cwq;
struct workqueue_struct *wq;
const cpumask_t *cpu_map;
const struct cpumask *cpu_map;
int cpu;
might_sleep();
@@ -903,7 +903,7 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
*/
void destroy_workqueue(struct workqueue_struct *wq)
{
const cpumask_t *cpu_map = wq_cpu_map(wq);
const struct cpumask *cpu_map = wq_cpu_map(wq);
int cpu;
cpu_maps_update_begin();
@@ -933,7 +933,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
cpu_set(cpu, cpu_populated_map);
cpumask_set_cpu(cpu, cpu_populated_map);
}
undo:
list_for_each_entry(wq, &workqueues, list) {
@@ -964,7 +964,7 @@ undo:
switch (action) {
case CPU_UP_CANCELED:
case CPU_POST_DEAD:
cpu_clear(cpu, cpu_populated_map);
cpumask_clear_cpu(cpu, cpu_populated_map);
}
return ret;
@@ -1017,9 +1017,11 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
void __init init_workqueues(void)
{
cpu_populated_map = cpu_online_map;
singlethread_cpu = first_cpu(cpu_possible_map);
cpu_singlethread_map = cpumask_of_cpu(singlethread_cpu);
alloc_cpumask_var(&cpu_populated_map, GFP_KERNEL);
cpumask_copy(cpu_populated_map, cpu_online_mask);
singlethread_cpu = cpumask_first(cpu_possible_mask);
cpu_singlethread_map = cpumask_of(singlethread_cpu);
hotcpu_notifier(workqueue_cpu_callback, 0);
keventd_wq = create_workqueue("events");
BUG_ON(!keventd_wq);