Merge ../linux-2.6 by hand

This commit is contained in:
Paul Mackerras
2005-10-31 13:37:12 +11:00
2476 changed files with 119495 additions and 59935 deletions

View File

@@ -22,7 +22,6 @@ obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_IKCONFIG) += configs.o
obj-$(CONFIG_IKCONFIG_PROC) += configs.o
obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
@@ -32,6 +31,7 @@ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is

View File

@@ -553,7 +553,7 @@ void acct_update_integrals(struct task_struct *tsk)
if (delta == 0)
return;
tsk->acct_stimexpd = tsk->stime;
tsk->acct_rss_mem1 += delta * get_mm_counter(tsk->mm, rss);
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
}
}

View File

@@ -133,7 +133,7 @@ struct audit_buffer {
struct list_head list;
struct sk_buff *skb; /* formatted skb ready to send */
struct audit_context *ctx; /* NULL or associated context */
int gfp_mask;
gfp_t gfp_mask;
};
static void audit_set_pid(struct audit_buffer *ab, pid_t pid)
@@ -647,7 +647,7 @@ static inline void audit_get_stamp(struct audit_context *ctx,
* will be written at syscall exit. If there is no associated task, tsk
* should be NULL. */
struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask,
struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
int type)
{
struct audit_buffer *ab = NULL;
@@ -879,7 +879,7 @@ void audit_log_end(struct audit_buffer *ab)
/* Log an audit record. This is a convenience function that calls
* audit_log_start, audit_log_vformat, and audit_log_end. It may be
* called in any context. */
void audit_log(struct audit_context *ctx, int gfp_mask, int type,
void audit_log(struct audit_context *ctx, gfp_t gfp_mask, int type,
const char *fmt, ...)
{
struct audit_buffer *ab;

View File

@@ -803,7 +803,7 @@ static void audit_log_task_info(struct audit_buffer *ab)
up_read(&mm->mmap_sem);
}
static void audit_log_exit(struct audit_context *context, unsigned int gfp_mask)
static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
{
int i;
struct audit_buffer *ab;

View File

@@ -17,6 +17,7 @@
/* This protects CPUs going up and down... */
DECLARE_MUTEX(cpucontrol);
EXPORT_SYMBOL_GPL(cpucontrol);
static struct notifier_block *cpu_chain;

View File

@@ -32,6 +32,7 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/mempolicy.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mount.h>
@@ -60,6 +61,9 @@ struct cpuset {
cpumask_t cpus_allowed; /* CPUs allowed to tasks in cpuset */
nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */
/*
* Count is atomic so can incr (fork) or decr (exit) without a lock.
*/
atomic_t count; /* count tasks using this cpuset */
/*
@@ -142,80 +146,91 @@ static struct vfsmount *cpuset_mount;
static struct super_block *cpuset_sb = NULL;
/*
* cpuset_sem should be held by anyone who is depending on the children
* or sibling lists of any cpuset, or performing non-atomic operations
* on the flags or *_allowed values of a cpuset, such as raising the
* CS_REMOVED flag bit iff it is not already raised, or reading and
* conditionally modifying the *_allowed values. One kernel global
* cpuset semaphore should be sufficient - these things don't change
* that much.
* We have two global cpuset semaphores below. They can nest.
* It is ok to first take manage_sem, then nest callback_sem. We also
* require taking task_lock() when dereferencing a tasks cpuset pointer.
* See "The task_lock() exception", at the end of this comment.
*
* The code that modifies cpusets holds cpuset_sem across the entire
* operation, from cpuset_common_file_write() down, single threading
* all cpuset modifications (except for counter manipulations from
* fork and exit) across the system. This presumes that cpuset
* modifications are rare - better kept simple and safe, even if slow.
* A task must hold both semaphores to modify cpusets. If a task
* holds manage_sem, then it blocks others wanting that semaphore,
* ensuring that it is the only task able to also acquire callback_sem
* and be able to modify cpusets. It can perform various checks on
* the cpuset structure first, knowing nothing will change. It can
* also allocate memory while just holding manage_sem. While it is
* performing these checks, various callback routines can briefly
* acquire callback_sem to query cpusets. Once it is ready to make
* the changes, it takes callback_sem, blocking everyone else.
*
* The code that reads cpusets, such as in cpuset_common_file_read()
* and below, only holds cpuset_sem across small pieces of code, such
* as when reading out possibly multi-word cpumasks and nodemasks, as
* the risks are less, and the desire for performance a little greater.
* The proc_cpuset_show() routine needs to hold cpuset_sem to insure
* that no cs->dentry is NULL, as it walks up the cpuset tree to root.
* Calls to the kernel memory allocator can not be made while holding
* callback_sem, as that would risk double tripping on callback_sem
* from one of the callbacks into the cpuset code from within
* __alloc_pages().
*
* The hooks from fork and exit, cpuset_fork() and cpuset_exit(), don't
* (usually) grab cpuset_sem. These are the two most performance
* critical pieces of code here. The exception occurs on exit(),
* when a task in a notify_on_release cpuset exits. Then cpuset_sem
* If a task is only holding callback_sem, then it has read-only
* access to cpusets.
*
* The task_struct fields mems_allowed and mems_generation may only
* be accessed in the context of that task, so require no locks.
*
* Any task can increment and decrement the count field without lock.
* So in general, code holding manage_sem or callback_sem can't rely
* on the count field not changing. However, if the count goes to
* zero, then only attach_task(), which holds both semaphores, can
* increment it again. Because a count of zero means that no tasks
* are currently attached, therefore there is no way a task attached
* to that cpuset can fork (the other way to increment the count).
* So code holding manage_sem or callback_sem can safely assume that
* if the count is zero, it will stay zero. Similarly, if a task
* holds manage_sem or callback_sem on a cpuset with zero count, it
* knows that the cpuset won't be removed, as cpuset_rmdir() needs
* both of those semaphores.
*
* A possible optimization to improve parallelism would be to make
* callback_sem a R/W semaphore (rwsem), allowing the callback routines
* to proceed in parallel, with read access, until the holder of
* manage_sem needed to take this rwsem for exclusive write access
* and modify some cpusets.
*
* The cpuset_common_file_write handler for operations that modify
* the cpuset hierarchy holds manage_sem across the entire operation,
* single threading all such cpuset modifications across the system.
*
* The cpuset_common_file_read() handlers only hold callback_sem across
* small pieces of code, such as when reading out possibly multi-word
* cpumasks and nodemasks.
*
* The fork and exit callbacks cpuset_fork() and cpuset_exit(), don't
* (usually) take either semaphore. These are the two most performance
* critical pieces of code here. The exception occurs on cpuset_exit(),
* when a task in a notify_on_release cpuset exits. Then manage_sem
* is taken, and if the cpuset count is zero, a usermode call made
* to /sbin/cpuset_release_agent with the name of the cpuset (path
* relative to the root of cpuset file system) as the argument.
*
* A cpuset can only be deleted if both its 'count' of using tasks is
* zero, and its list of 'children' cpusets is empty. Since all tasks
* in the system use _some_ cpuset, and since there is always at least
* one task in the system (init, pid == 1), therefore, top_cpuset
* always has either children cpusets and/or using tasks. So no need
* for any special hack to ensure that top_cpuset cannot be deleted.
* A cpuset can only be deleted if both its 'count' of using tasks
* is zero, and its list of 'children' cpusets is empty. Since all
* tasks in the system use _some_ cpuset, and since there is always at
* least one task in the system (init, pid == 1), therefore, top_cpuset
* always has either children cpusets and/or using tasks. So we don't
* need a special hack to ensure that top_cpuset cannot be deleted.
*
* The above "Tale of Two Semaphores" would be complete, but for:
*
* The task_lock() exception
*
* The need for this exception arises from the action of attach_task(),
* which overwrites one tasks cpuset pointer with another. It does
* so using both semaphores, however there are several performance
* critical places that need to reference task->cpuset without the
* expense of grabbing a system global semaphore. Therefore except as
* noted below, when dereferencing or, as in attach_task(), modifying
* a tasks cpuset pointer we use task_lock(), which acts on a spinlock
* (task->alloc_lock) already in the task_struct routinely used for
* such matters.
*/
static DECLARE_MUTEX(cpuset_sem);
static struct task_struct *cpuset_sem_owner;
static int cpuset_sem_depth;
/*
* The global cpuset semaphore cpuset_sem can be needed by the
* memory allocator to update a tasks mems_allowed (see the calls
* to cpuset_update_current_mems_allowed()) or to walk up the
* cpuset hierarchy to find a mem_exclusive cpuset see the calls
* to cpuset_excl_nodes_overlap()).
*
* But if the memory allocation is being done by cpuset.c code, it
* usually already holds cpuset_sem. Double tripping on a kernel
* semaphore deadlocks the current task, and any other task that
* subsequently tries to obtain the lock.
*
* Run all up's and down's on cpuset_sem through the following
* wrappers, which will detect this nested locking, and avoid
* deadlocking.
*/
static inline void cpuset_down(struct semaphore *psem)
{
if (cpuset_sem_owner != current) {
down(psem);
cpuset_sem_owner = current;
}
cpuset_sem_depth++;
}
static inline void cpuset_up(struct semaphore *psem)
{
if (--cpuset_sem_depth == 0) {
cpuset_sem_owner = NULL;
up(psem);
}
}
static DECLARE_MUTEX(manage_sem);
static DECLARE_MUTEX(callback_sem);
/*
* A couple of forward declarations required, due to cyclic reference loop:
@@ -390,7 +405,7 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
}
/*
* Call with cpuset_sem held. Writes path of cpuset into buf.
* Call with manage_sem held. Writes path of cpuset into buf.
* Returns 0 on success, -errno on error.
*/
@@ -442,10 +457,11 @@ static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
* status of the /sbin/cpuset_release_agent task, so no sense holding
* our caller up for that.
*
* The simple act of forking that task might require more memory,
* which might need cpuset_sem. So this routine must be called while
* cpuset_sem is not held, to avoid a possible deadlock. See also
* comments for check_for_release(), below.
* When we had only one cpuset semaphore, we had to call this
* without holding it, to avoid deadlock when call_usermodehelper()
* allocated memory. With two locks, we could now call this while
* holding manage_sem, but we still don't, so as to minimize
* the time manage_sem is held.
*/
static void cpuset_release_agent(const char *pathbuf)
@@ -477,15 +493,15 @@ static void cpuset_release_agent(const char *pathbuf)
* cs is notify_on_release() and now both the user count is zero and
* the list of children is empty, prepare cpuset path in a kmalloc'd
* buffer, to be returned via ppathbuf, so that the caller can invoke
* cpuset_release_agent() with it later on, once cpuset_sem is dropped.
* Call here with cpuset_sem held.
* cpuset_release_agent() with it later on, once manage_sem is dropped.
* Call here with manage_sem held.
*
* This check_for_release() routine is responsible for kmalloc'ing
* pathbuf. The above cpuset_release_agent() is responsible for
* kfree'ing pathbuf. The caller of these routines is responsible
* for providing a pathbuf pointer, initialized to NULL, then
* calling check_for_release() with cpuset_sem held and the address
* of the pathbuf pointer, then dropping cpuset_sem, then calling
* calling check_for_release() with manage_sem held and the address
* of the pathbuf pointer, then dropping manage_sem, then calling
* cpuset_release_agent() with pathbuf, as set by check_for_release().
*/
@@ -516,7 +532,7 @@ static void check_for_release(struct cpuset *cs, char **ppathbuf)
* One way or another, we guarantee to return some non-empty subset
* of cpu_online_map.
*
* Call with cpuset_sem held.
* Call with callback_sem held.
*/
static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
@@ -540,7 +556,7 @@ static void guarantee_online_cpus(const struct cpuset *cs, cpumask_t *pmask)
* One way or another, we guarantee to return some non-empty subset
* of node_online_map.
*
* Call with cpuset_sem held.
* Call with callback_sem held.
*/
static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
@@ -555,22 +571,47 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask)
}
/*
* Refresh current tasks mems_allowed and mems_generation from
* current tasks cpuset. Call with cpuset_sem held.
* Refresh current tasks mems_allowed and mems_generation from current
* tasks cpuset.
*
* This routine is needed to update the per-task mems_allowed
* data, within the tasks context, when it is trying to allocate
* memory (in various mm/mempolicy.c routines) and notices
* that some other task has been modifying its cpuset.
* Call without callback_sem or task_lock() held. May be called with
* or without manage_sem held. Will acquire task_lock() and might
* acquire callback_sem during call.
*
* The task_lock() is required to dereference current->cpuset safely.
* Without it, we could pick up the pointer value of current->cpuset
* in one instruction, and then attach_task could give us a different
* cpuset, and then the cpuset we had could be removed and freed,
* and then on our next instruction, we could dereference a no longer
* valid cpuset pointer to get its mems_generation field.
*
* This routine is needed to update the per-task mems_allowed data,
* within the tasks context, when it is trying to allocate memory
* (in various mm/mempolicy.c routines) and notices that some other
* task has been modifying its cpuset.
*/
static void refresh_mems(void)
{
struct cpuset *cs = current->cpuset;
int my_cpusets_mem_gen;
if (current->cpuset_mems_generation != cs->mems_generation) {
task_lock(current);
my_cpusets_mem_gen = current->cpuset->mems_generation;
task_unlock(current);
if (current->cpuset_mems_generation != my_cpusets_mem_gen) {
struct cpuset *cs;
nodemask_t oldmem = current->mems_allowed;
down(&callback_sem);
task_lock(current);
cs = current->cpuset;
guarantee_online_mems(cs, &current->mems_allowed);
current->cpuset_mems_generation = cs->mems_generation;
task_unlock(current);
up(&callback_sem);
if (!nodes_equal(oldmem, current->mems_allowed))
numa_policy_rebind(&oldmem, &current->mems_allowed);
}
}
@@ -579,7 +620,7 @@ static void refresh_mems(void)
*
* One cpuset is a subset of another if all its allowed CPUs and
* Memory Nodes are a subset of the other, and its exclusive flags
* are only set if the other's are set.
* are only set if the other's are set. Call holding manage_sem.
*/
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
@@ -597,7 +638,7 @@ static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
* If we replaced the flag and mask values of the current cpuset
* (cur) with those values in the trial cpuset (trial), would
* our various subset and exclusive rules still be valid? Presumes
* cpuset_sem held.
* manage_sem held.
*
* 'cur' is the address of an actual, in-use cpuset. Operations
* such as list traversal that depend on the actual address of the
@@ -651,7 +692,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
* exclusive child cpusets
* Build these two partitions by calling partition_sched_domains
*
* Call with cpuset_sem held. May nest a call to the
* Call with manage_sem held. May nest a call to the
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
*/
@@ -696,6 +737,10 @@ static void update_cpu_domains(struct cpuset *cur)
unlock_cpu_hotplug();
}
/*
* Call with manage_sem held. May take callback_sem during call.
*/
static int update_cpumask(struct cpuset *cs, char *buf)
{
struct cpuset trialcs;
@@ -712,12 +757,18 @@ static int update_cpumask(struct cpuset *cs, char *buf)
if (retval < 0)
return retval;
cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
down(&callback_sem);
cs->cpus_allowed = trialcs.cpus_allowed;
up(&callback_sem);
if (is_cpu_exclusive(cs) && !cpus_unchanged)
update_cpu_domains(cs);
return 0;
}
/*
* Call with manage_sem held. May take callback_sem during call.
*/
static int update_nodemask(struct cpuset *cs, char *buf)
{
struct cpuset trialcs;
@@ -732,9 +783,11 @@ static int update_nodemask(struct cpuset *cs, char *buf)
return -ENOSPC;
retval = validate_change(cs, &trialcs);
if (retval == 0) {
down(&callback_sem);
cs->mems_allowed = trialcs.mems_allowed;
atomic_inc(&cpuset_mems_generation);
cs->mems_generation = atomic_read(&cpuset_mems_generation);
up(&callback_sem);
}
return retval;
}
@@ -745,6 +798,8 @@ static int update_nodemask(struct cpuset *cs, char *buf)
* CS_NOTIFY_ON_RELEASE)
* cs: the cpuset to update
* buf: the buffer where we read the 0 or 1
*
* Call with manage_sem held.
*/
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
@@ -766,16 +821,27 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
return err;
cpu_exclusive_changed =
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
down(&callback_sem);
if (turning_on)
set_bit(bit, &cs->flags);
else
clear_bit(bit, &cs->flags);
up(&callback_sem);
if (cpu_exclusive_changed)
update_cpu_domains(cs);
return 0;
}
/*
* Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
* writing the path of the old cpuset in 'ppathbuf' if it needs to be
* notified on release.
*
* Call holding manage_sem. May take callback_sem and task_lock of
* the task 'pid' during call.
*/
static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
{
pid_t pid;
@@ -792,7 +858,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
if (!tsk) {
if (!tsk || tsk->flags & PF_EXITING) {
read_unlock(&tasklist_lock);
return -ESRCH;
}
@@ -810,10 +876,13 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
get_task_struct(tsk);
}
down(&callback_sem);
task_lock(tsk);
oldcs = tsk->cpuset;
if (!oldcs) {
task_unlock(tsk);
up(&callback_sem);
put_task_struct(tsk);
return -ESRCH;
}
@@ -824,6 +893,7 @@ static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
guarantee_online_cpus(cs, &cpus);
set_cpus_allowed(tsk, cpus);
up(&callback_sem);
put_task_struct(tsk);
if (atomic_dec_and_test(&oldcs->count))
check_for_release(oldcs, ppathbuf);
@@ -867,7 +937,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
}
buffer[nbytes] = 0; /* nul-terminate */
cpuset_down(&cpuset_sem);
down(&manage_sem);
if (is_removed(cs)) {
retval = -ENODEV;
@@ -901,7 +971,7 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *us
if (retval == 0)
retval = nbytes;
out2:
cpuset_up(&cpuset_sem);
up(&manage_sem);
cpuset_release_agent(pathbuf);
out1:
kfree(buffer);
@@ -941,9 +1011,9 @@ static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
cpumask_t mask;
cpuset_down(&cpuset_sem);
down(&callback_sem);
mask = cs->cpus_allowed;
cpuset_up(&cpuset_sem);
up(&callback_sem);
return cpulist_scnprintf(page, PAGE_SIZE, mask);
}
@@ -952,9 +1022,9 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
nodemask_t mask;
cpuset_down(&cpuset_sem);
down(&callback_sem);
mask = cs->mems_allowed;
cpuset_up(&cpuset_sem);
up(&callback_sem);
return nodelist_scnprintf(page, PAGE_SIZE, mask);
}
@@ -995,7 +1065,6 @@ static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
goto out;
}
*s++ = '\n';
*s = '\0';
retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
out:
@@ -1048,6 +1117,21 @@ static int cpuset_file_release(struct inode *inode, struct file *file)
return 0;
}
/*
* cpuset_rename - Only allow simple rename of directories in place.
*/
static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
if (!S_ISDIR(old_dentry->d_inode->i_mode))
return -ENOTDIR;
if (new_dentry->d_inode)
return -EEXIST;
if (old_dir != new_dir)
return -EIO;
return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
}
static struct file_operations cpuset_file_operations = {
.read = cpuset_file_read,
.write = cpuset_file_write,
@@ -1060,6 +1144,7 @@ static struct inode_operations cpuset_dir_inode_operations = {
.lookup = simple_lookup,
.mkdir = cpuset_mkdir,
.rmdir = cpuset_rmdir,
.rename = cpuset_rename,
};
static int cpuset_create_file(struct dentry *dentry, int mode)
@@ -1163,7 +1248,9 @@ struct ctr_struct {
/*
* Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
* Return actual number of pids loaded.
* Return actual number of pids loaded. No need to task_lock(p)
* when reading out p->cpuset, as we don't really care if it changes
* on the next cycle, and we are not going to try to dereference it.
*/
static inline int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
{
@@ -1205,6 +1292,12 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
return cnt;
}
/*
* Handle an open on 'tasks' file. Prepare a buffer listing the
* process id's of tasks currently attached to the cpuset being opened.
*
* Does not require any specific cpuset semaphores, and does not take any.
*/
static int cpuset_tasks_open(struct inode *unused, struct file *file)
{
struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
@@ -1352,7 +1445,8 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
if (!cs)
return -ENOMEM;
cpuset_down(&cpuset_sem);
down(&manage_sem);
refresh_mems();
cs->flags = 0;
if (notify_on_release(parent))
set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
@@ -1366,25 +1460,27 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode)
cs->parent = parent;
down(&callback_sem);
list_add(&cs->sibling, &cs->parent->children);
up(&callback_sem);
err = cpuset_create_dir(cs, name, mode);
if (err < 0)
goto err;
/*
* Release cpuset_sem before cpuset_populate_dir() because it
* Release manage_sem before cpuset_populate_dir() because it
* will down() this new directory's i_sem and if we race with
* another mkdir, we might deadlock.
*/
cpuset_up(&cpuset_sem);
up(&manage_sem);
err = cpuset_populate_dir(cs->dentry);
/* If err < 0, we have a half-filled directory - oh well ;) */
return 0;
err:
list_del(&cs->sibling);
cpuset_up(&cpuset_sem);
up(&manage_sem);
kfree(cs);
return err;
}
@@ -1406,29 +1502,32 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
/* the vfs holds both inode->i_sem already */
cpuset_down(&cpuset_sem);
down(&manage_sem);
refresh_mems();
if (atomic_read(&cs->count) > 0) {
cpuset_up(&cpuset_sem);
up(&manage_sem);
return -EBUSY;
}
if (!list_empty(&cs->children)) {
cpuset_up(&cpuset_sem);
up(&manage_sem);
return -EBUSY;
}
parent = cs->parent;
down(&callback_sem);
set_bit(CS_REMOVED, &cs->flags);
if (is_cpu_exclusive(cs))
update_cpu_domains(cs);
list_del(&cs->sibling); /* delete my sibling from parent->children */
if (list_empty(&parent->children))
check_for_release(parent, &pathbuf);
spin_lock(&cs->dentry->d_lock);
d = dget(cs->dentry);
cs->dentry = NULL;
spin_unlock(&d->d_lock);
cpuset_d_remove_dir(d);
dput(d);
cpuset_up(&cpuset_sem);
up(&callback_sem);
if (list_empty(&parent->children))
check_for_release(parent, &pathbuf);
up(&manage_sem);
cpuset_release_agent(pathbuf);
return 0;
}
@@ -1488,16 +1587,26 @@ void __init cpuset_init_smp(void)
* cpuset_fork - attach newly forked task to its parents cpuset.
* @tsk: pointer to task_struct of forking parent process.
*
* Description: By default, on fork, a task inherits its
* parent's cpuset. The pointer to the shared cpuset is
* automatically copied in fork.c by dup_task_struct().
* This cpuset_fork() routine need only increment the usage
* counter in that cpuset.
* Description: A task inherits its parent's cpuset at fork().
*
* A pointer to the shared cpuset was automatically copied in fork.c
* by dup_task_struct(). However, we ignore that copy, since it was
* not made under the protection of task_lock(), so might no longer be
* a valid cpuset pointer. attach_task() might have already changed
* current->cpuset, allowing the previously referenced cpuset to
* be removed and freed. Instead, we task_lock(current) and copy
* its present value of current->cpuset for our freshly forked child.
*
* At the point that cpuset_fork() is called, 'current' is the parent
* task, and the passed argument 'child' points to the child task.
**/
void cpuset_fork(struct task_struct *tsk)
void cpuset_fork(struct task_struct *child)
{
atomic_inc(&tsk->cpuset->count);
task_lock(current);
child->cpuset = current->cpuset;
atomic_inc(&child->cpuset->count);
task_unlock(current);
}
/**
@@ -1506,35 +1615,42 @@ void cpuset_fork(struct task_struct *tsk)
*
* Description: Detach cpuset from @tsk and release it.
*
* Note that cpusets marked notify_on_release force every task
* in them to take the global cpuset_sem semaphore when exiting.
* This could impact scaling on very large systems. Be reluctant
* to use notify_on_release cpusets where very high task exit
* scaling is required on large systems.
* Note that cpusets marked notify_on_release force every task in
* them to take the global manage_sem semaphore when exiting.
* This could impact scaling on very large systems. Be reluctant to
* use notify_on_release cpusets where very high task exit scaling
* is required on large systems.
*
* Don't even think about derefencing 'cs' after the cpuset use
* count goes to zero, except inside a critical section guarded
* by the cpuset_sem semaphore. If you don't hold cpuset_sem,
* then a zero cpuset use count is a license to any other task to
* nuke the cpuset immediately.
* Don't even think about derefencing 'cs' after the cpuset use count
* goes to zero, except inside a critical section guarded by manage_sem
* or callback_sem. Otherwise a zero cpuset use count is a license to
* any other task to nuke the cpuset immediately, via cpuset_rmdir().
*
* This routine has to take manage_sem, not callback_sem, because
* it is holding that semaphore while calling check_for_release(),
* which calls kmalloc(), so can't be called holding callback__sem().
*
* We don't need to task_lock() this reference to tsk->cpuset,
* because tsk is already marked PF_EXITING, so attach_task() won't
* mess with it.
**/
void cpuset_exit(struct task_struct *tsk)
{
struct cpuset *cs;
task_lock(tsk);
BUG_ON(!(tsk->flags & PF_EXITING));
cs = tsk->cpuset;
tsk->cpuset = NULL;
task_unlock(tsk);
if (notify_on_release(cs)) {
char *pathbuf = NULL;
cpuset_down(&cpuset_sem);
down(&manage_sem);
if (atomic_dec_and_test(&cs->count))
check_for_release(cs, &pathbuf);
cpuset_up(&cpuset_sem);
up(&manage_sem);
cpuset_release_agent(pathbuf);
} else {
atomic_dec(&cs->count);
@@ -1555,11 +1671,11 @@ cpumask_t cpuset_cpus_allowed(const struct task_struct *tsk)
{
cpumask_t mask;
cpuset_down(&cpuset_sem);
down(&callback_sem);
task_lock((struct task_struct *)tsk);
guarantee_online_cpus(tsk->cpuset, &mask);
task_unlock((struct task_struct *)tsk);
cpuset_up(&cpuset_sem);
up(&callback_sem);
return mask;
}
@@ -1575,19 +1691,28 @@ void cpuset_init_current_mems_allowed(void)
* If the current tasks cpusets mems_allowed changed behind our backs,
* update current->mems_allowed and mems_generation to the new value.
* Do not call this routine if in_interrupt().
*
* Call without callback_sem or task_lock() held. May be called
* with or without manage_sem held. Unless exiting, it will acquire
* task_lock(). Also might acquire callback_sem during call to
* refresh_mems().
*/
void cpuset_update_current_mems_allowed(void)
{
struct cpuset *cs = current->cpuset;
struct cpuset *cs;
int need_to_refresh = 0;
task_lock(current);
cs = current->cpuset;
if (!cs)
return; /* task is exiting */
if (current->cpuset_mems_generation != cs->mems_generation) {
cpuset_down(&cpuset_sem);
goto done;
if (current->cpuset_mems_generation != cs->mems_generation)
need_to_refresh = 1;
done:
task_unlock(current);
if (need_to_refresh)
refresh_mems();
cpuset_up(&cpuset_sem);
}
}
/**
@@ -1621,7 +1746,7 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
/*
* nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
* ancestor to the specified cpuset. Call while holding cpuset_sem.
* ancestor to the specified cpuset. Call holding callback_sem.
* If no ancestor is mem_exclusive (an unusual configuration), then
* returns the root cpuset.
*/
@@ -1648,12 +1773,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* GFP_KERNEL allocations are not so marked, so can escape to the
* nearest mem_exclusive ancestor cpuset.
*
* Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
* Scanning up parent cpusets requires callback_sem. The __alloc_pages()
* routine only calls here with __GFP_HARDWALL bit _not_ set if
* it's a GFP_KERNEL allocation, and all nodes in the current tasks
* mems_allowed came up empty on the first pass over the zonelist.
* So only GFP_KERNEL allocations, if all nodes in the cpuset are
* short of memory, might require taking the cpuset_sem semaphore.
* short of memory, might require taking the callback_sem semaphore.
*
* The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
* calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
@@ -1685,14 +1810,16 @@ int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
return 0;
/* Not hardwall and node outside mems_allowed: scan up cpusets */
cpuset_down(&cpuset_sem);
cs = current->cpuset;
if (!cs)
goto done; /* current task exiting */
cs = nearest_exclusive_ancestor(cs);
down(&callback_sem);
if (current->flags & PF_EXITING) /* Let dying task have memory */
return 1;
task_lock(current);
cs = nearest_exclusive_ancestor(current->cpuset);
task_unlock(current);
allowed = node_isset(node, cs->mems_allowed);
done:
cpuset_up(&cpuset_sem);
up(&callback_sem);
return allowed;
}
@@ -1705,7 +1832,7 @@ done:
* determine if task @p's memory usage might impact the memory
* available to the current task.
*
* Acquires cpuset_sem - not suitable for calling from a fast path.
* Acquires callback_sem - not suitable for calling from a fast path.
**/
int cpuset_excl_nodes_overlap(const struct task_struct *p)
@@ -1713,18 +1840,27 @@ int cpuset_excl_nodes_overlap(const struct task_struct *p)
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
int overlap = 0; /* do cpusets overlap? */
cpuset_down(&cpuset_sem);
cs1 = current->cpuset;
if (!cs1)
goto done; /* current task exiting */
cs2 = p->cpuset;
if (!cs2)
goto done; /* task p is exiting */
cs1 = nearest_exclusive_ancestor(cs1);
cs2 = nearest_exclusive_ancestor(cs2);
down(&callback_sem);
task_lock(current);
if (current->flags & PF_EXITING) {
task_unlock(current);
goto done;
}
cs1 = nearest_exclusive_ancestor(current->cpuset);
task_unlock(current);
task_lock((struct task_struct *)p);
if (p->flags & PF_EXITING) {
task_unlock((struct task_struct *)p);
goto done;
}
cs2 = nearest_exclusive_ancestor(p->cpuset);
task_unlock((struct task_struct *)p);
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
done:
cpuset_up(&cpuset_sem);
up(&callback_sem);
return overlap;
}
@@ -1733,6 +1869,10 @@ done:
* proc_cpuset_show()
* - Print tasks cpuset path into seq_file.
* - Used for /proc/<pid>/cpuset.
* - No need to task_lock(tsk) on this tsk->cpuset reference, as it
* doesn't really matter if tsk->cpuset changes after we read it,
* and we take manage_sem, keeping attach_task() from changing it
* anyway.
*/
static int proc_cpuset_show(struct seq_file *m, void *v)
@@ -1747,10 +1887,8 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
return -ENOMEM;
tsk = m->private;
cpuset_down(&cpuset_sem);
task_lock(tsk);
down(&manage_sem);
cs = tsk->cpuset;
task_unlock(tsk);
if (!cs) {
retval = -EINVAL;
goto out;
@@ -1762,7 +1900,7 @@ static int proc_cpuset_show(struct seq_file *m, void *v)
seq_puts(m, buf);
seq_putc(m, '\n');
out:
cpuset_up(&cpuset_sem);
up(&manage_sem);
kfree(buf);
return retval;
}

View File

@@ -547,7 +547,7 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
if (p->pdeath_signal)
/* We already hold the tasklist_lock here. */
group_send_sig_info(p->pdeath_signal, (void *) 0, p);
group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
/* Move the child from its dying parent to the new one. */
if (unlikely(traced)) {
@@ -591,8 +591,8 @@ static inline void reparent_thread(task_t *p, task_t *father, int traced)
int pgrp = process_group(p);
if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
__kill_pg_info(SIGHUP, (void *)1, pgrp);
__kill_pg_info(SIGCONT, (void *)1, pgrp);
__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
}
}
@@ -727,8 +727,8 @@ static void exit_notify(struct task_struct *tsk)
(t->signal->session == tsk->signal->session) &&
will_become_orphaned_pgrp(process_group(tsk), tsk) &&
has_stopped_jobs(process_group(tsk))) {
__kill_pg_info(SIGHUP, (void *)1, process_group(tsk));
__kill_pg_info(SIGCONT, (void *)1, process_group(tsk));
__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
__kill_pg_info(SIGCONT, SEND_SIG_PRIV, process_group(tsk));
}
/* Let father know we died
@@ -783,10 +783,6 @@ static void exit_notify(struct task_struct *tsk)
/* If the process is dead, release it - nobody will wait for it */
if (state == EXIT_DEAD)
release_task(tsk);
/* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable();
tsk->flags |= PF_DEAD;
}
fastcall NORET_TYPE void do_exit(long code)
@@ -839,7 +835,10 @@ fastcall NORET_TYPE void do_exit(long code)
preempt_count());
acct_update_integrals(tsk);
update_mem_hiwater(tsk);
if (tsk->mm) {
update_hiwater_rss(tsk->mm);
update_hiwater_vm(tsk->mm);
}
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
del_timer_sync(&tsk->signal->real_timer);
@@ -870,7 +869,11 @@ fastcall NORET_TYPE void do_exit(long code)
tsk->mempolicy = NULL;
#endif
BUG_ON(!(current->flags & PF_DEAD));
/* PF_DEAD causes final put_task_struct after we schedule. */
preempt_disable();
BUG_ON(tsk->flags & PF_DEAD);
tsk->flags |= PF_DEAD;
schedule();
BUG();
/* Avoid "noreturn function does return". */
@@ -1380,6 +1383,15 @@ repeat:
switch (p->state) {
case TASK_TRACED:
/*
* When we hit the race with PTRACE_ATTACH,
* we will not report this child. But the
* race means it has not yet been moved to
* our ptrace_children list, so we need to
* set the flag here to avoid a spurious ECHILD
* when the race happens with the only child.
*/
flag = 1;
if (!my_ptrace_child(p))
continue;
/*FALLTHROUGH*/

View File

@@ -182,37 +182,37 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
}
#ifdef CONFIG_MMU
static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
struct vm_area_struct * mpnt, *tmp, **pprev;
struct vm_area_struct *mpnt, *tmp, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
struct mempolicy *pol;
down_write(&oldmm->mmap_sem);
flush_cache_mm(current->mm);
flush_cache_mm(oldmm);
down_write(&mm->mmap_sem);
mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_cache = NULL;
mm->free_area_cache = oldmm->mmap_base;
mm->cached_hole_size = ~0UL;
mm->map_count = 0;
set_mm_counter(mm, rss, 0);
set_mm_counter(mm, anon_rss, 0);
cpus_clear(mm->cpu_vm_mask);
mm->mm_rb = RB_ROOT;
rb_link = &mm->mm_rb.rb_node;
rb_parent = NULL;
pprev = &mm->mmap;
for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
if (mpnt->vm_flags & VM_DONTCOPY) {
long pages = vma_pages(mpnt);
mm->total_vm -= pages;
__vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
-pages);
continue;
}
@@ -253,12 +253,8 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
}
/*
* Link in the new vma and copy the page table entries:
* link in first so that swapoff can see swap entries.
* Note that, exceptionally, here the vma is inserted
* without holding mm->mmap_sem.
* Link in the new vma and copy the page table entries.
*/
spin_lock(&mm->page_table_lock);
*pprev = tmp;
pprev = &tmp->vm_next;
@@ -267,8 +263,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
rb_parent = &tmp->vm_rb;
mm->map_count++;
retval = copy_page_range(mm, current->mm, tmp);
spin_unlock(&mm->page_table_lock);
retval = copy_page_range(mm, oldmm, tmp);
if (tmp->vm_ops && tmp->vm_ops->open)
tmp->vm_ops->open(tmp);
@@ -277,9 +272,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
goto out;
}
retval = 0;
out:
flush_tlb_mm(current->mm);
up_write(&mm->mmap_sem);
flush_tlb_mm(oldmm);
up_write(&oldmm->mmap_sem);
return retval;
fail_nomem_policy:
@@ -323,6 +318,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
INIT_LIST_HEAD(&mm->mmlist);
mm->core_waiters = 0;
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
spin_lock_init(&mm->page_table_lock);
rwlock_init(&mm->ioctx_list_lock);
mm->ioctx_list = NULL;
@@ -499,7 +496,7 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
if (retval)
goto free_pt;
mm->hiwater_rss = get_mm_counter(mm,rss);
mm->hiwater_rss = get_mm_rss(mm);
mm->hiwater_vm = mm->total_vm;
good_mm:

View File

@@ -205,15 +205,13 @@ static int get_futex_key(unsigned long uaddr, union futex_key *key)
/*
* Do a quick atomic lookup first - this is the fastpath.
*/
spin_lock(&current->mm->page_table_lock);
page = follow_page(mm, uaddr, 0);
page = follow_page(mm, uaddr, FOLL_TOUCH|FOLL_GET);
if (likely(page != NULL)) {
key->shared.pgoff =
page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
spin_unlock(&current->mm->page_table_lock);
put_page(page);
return 0;
}
spin_unlock(&current->mm->page_table_lock);
/*
* Do it the general way.

View File

@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/proc_fs.h>
#include <linux/sched.h> /* for cond_resched */
#include <linux/mm.h>
#include <asm/sections.h>

View File

@@ -90,7 +90,7 @@ int kexec_should_crash(struct task_struct *p)
static int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
static struct page *kimage_alloc_page(struct kimage *image,
unsigned int gfp_mask,
gfp_t gfp_mask,
unsigned long dest);
static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
@@ -326,8 +326,7 @@ static int kimage_is_destination_range(struct kimage *image,
return 0;
}
static struct page *kimage_alloc_pages(unsigned int gfp_mask,
unsigned int order)
static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
{
struct page *pages;
@@ -335,7 +334,7 @@ static struct page *kimage_alloc_pages(unsigned int gfp_mask,
if (pages) {
unsigned int count, i;
pages->mapping = NULL;
pages->private = order;
set_page_private(pages, order);
count = 1 << order;
for (i = 0; i < count; i++)
SetPageReserved(pages + i);
@@ -348,7 +347,7 @@ static void kimage_free_pages(struct page *page)
{
unsigned int order, count, i;
order = page->private;
order = page_private(page);
count = 1 << order;
for (i = 0; i < count; i++)
ClearPageReserved(page + i);
@@ -654,7 +653,7 @@ static kimage_entry_t *kimage_dst_used(struct kimage *image,
}
static struct page *kimage_alloc_page(struct kimage *image,
unsigned int gfp_mask,
gfp_t gfp_mask,
unsigned long destination)
{
/*

View File

@@ -131,14 +131,14 @@ struct subprocess_info {
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
struct key *old_session;
struct key *new_session, *old_session;
int retval;
/* Unblock all signals and set the session keyring. */
key_get(sub_info->ring);
new_session = key_get(sub_info->ring);
flush_signals(current);
spin_lock_irq(&current->sighand->siglock);
old_session = __install_session_keyring(current, sub_info->ring);
old_session = __install_session_keyring(current, new_session);
flush_signal_handlers(current, 1);
sigemptyset(&current->blocked);
recalc_sigpending();

View File

@@ -35,6 +35,7 @@
#include <linux/spinlock.h>
#include <linux/hash.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <asm-generic/sections.h>

View File

@@ -164,6 +164,12 @@ void kthread_bind(struct task_struct *k, unsigned int cpu)
EXPORT_SYMBOL(kthread_bind);
int kthread_stop(struct task_struct *k)
{
return kthread_stop_sem(k, NULL);
}
EXPORT_SYMBOL(kthread_stop);
int kthread_stop_sem(struct task_struct *k, struct semaphore *s)
{
int ret;
@@ -178,7 +184,10 @@ int kthread_stop(struct task_struct *k)
/* Now set kthread_should_stop() to true, and wake it up. */
kthread_stop_info.k = k;
wake_up_process(k);
if (s)
up(s);
else
wake_up_process(k);
put_task_struct(k);
/* Once it dies, reset stop ptr, gather result and we're done. */
@@ -189,7 +198,7 @@ int kthread_stop(struct task_struct *k)
return ret;
}
EXPORT_SYMBOL(kthread_stop);
EXPORT_SYMBOL(kthread_stop_sem);
static __init int helper_init(void)
{

View File

@@ -23,6 +23,7 @@
#include <linux/module.h>
#include <linux/device.h>
#include <linux/err.h>
#include <linux/slab.h>
#if 0
#define DEBUGP printk

View File

@@ -1225,7 +1225,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
/*
* The task was cleaned up already, no future firings.
*/
return;
goto out;
/*
* Fetch the current sample and update the timer's expiry time.
@@ -1235,7 +1235,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
bump_cpu_timer(timer, now);
if (unlikely(p->exit_state)) {
clear_dead_task(timer, now);
return;
goto out;
}
read_lock(&tasklist_lock); /* arm_timer needs it. */
} else {
@@ -1248,8 +1248,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
put_task_struct(p);
timer->it.cpu.task = p = NULL;
timer->it.cpu.expires.sched = 0;
read_unlock(&tasklist_lock);
return;
goto out_unlock;
} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
/*
* We've noticed that the thread is dead, but
@@ -1257,8 +1256,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
* drop our task ref.
*/
clear_dead_task(timer, now);
read_unlock(&tasklist_lock);
return;
goto out_unlock;
}
cpu_clock_sample_group(timer->it_clock, p, &now);
bump_cpu_timer(timer, now);
@@ -1270,7 +1268,13 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
*/
arm_timer(timer, now);
out_unlock:
read_unlock(&tasklist_lock);
out:
timer->it_overrun_last = timer->it_overrun;
timer->it_overrun = -1;
++timer->it_requeue_pending;
}
/*

View File

@@ -1295,13 +1295,6 @@ sys_clock_getres(clockid_t which_clock, struct timespec __user *tp)
return error;
}
static void nanosleep_wake_up(unsigned long __data)
{
struct task_struct *p = (struct task_struct *) __data;
wake_up_process(p);
}
/*
* The standard says that an absolute nanosleep call MUST wake up at
* the requested time in spite of clock settings. Here is what we do:
@@ -1442,7 +1435,6 @@ static int common_nsleep(clockid_t which_clock,
int flags, struct timespec *tsave)
{
struct timespec t, dum;
struct timer_list new_timer;
DECLARE_WAITQUEUE(abs_wqueue, current);
u64 rq_time = (u64)0;
s64 left;
@@ -1451,10 +1443,6 @@ static int common_nsleep(clockid_t which_clock,
&current_thread_info()->restart_block;
abs_wqueue.flags = 0;
init_timer(&new_timer);
new_timer.expires = 0;
new_timer.data = (unsigned long) current;
new_timer.function = nanosleep_wake_up;
abs = flags & TIMER_ABSTIME;
if (restart_block->fn == clock_nanosleep_restart) {
@@ -1490,13 +1478,8 @@ static int common_nsleep(clockid_t which_clock,
if (left < (s64)0)
break;
new_timer.expires = jiffies + left;
__set_current_state(TASK_INTERRUPTIBLE);
add_timer(&new_timer);
schedule_timeout_interruptible(left);
schedule();
del_timer_sync(&new_timer);
left = rq_time - get_jiffies_64();
} while (left > (s64)0 && !test_thread_flag(TIF_SIGPENDING));

View File

@@ -4,7 +4,7 @@ EXTRA_CFLAGS += -DDEBUG
endif
obj-y := main.o process.o console.o pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o snapshot.o
obj-$(CONFIG_SUSPEND_SMP) += smp.o

View File

@@ -30,7 +30,6 @@ extern int swsusp_check(void);
extern int swsusp_read(void);
extern void swsusp_close(void);
extern int swsusp_resume(void);
extern int swsusp_free(void);
static int noresume = 0;
@@ -93,10 +92,7 @@ static void free_some_memory(void)
printk("Freeing memory... ");
while ((tmp = shrink_all_memory(10000))) {
pages += tmp;
printk("\b%c", p[i]);
i++;
if (i > 3)
i = 0;
printk("\b%c", p[i++ % 4]);
}
printk("\bdone (%li pages freed)\n", pages);
}
@@ -178,13 +174,12 @@ int pm_suspend_disk(void)
goto Done;
if (in_suspend) {
device_resume();
pr_debug("PM: writing image.\n");
error = swsusp_write();
if (!error)
power_down(pm_disk_mode);
else {
/* swsusp_write can not fail in device_resume,
no need to do second device_resume */
swsusp_free();
unprepare_processes();
return error;
@@ -252,14 +247,17 @@ static int software_resume(void)
pr_debug("PM: Reading swsusp image.\n");
if ((error = swsusp_read()))
goto Cleanup;
if ((error = swsusp_read())) {
swsusp_free();
goto Thaw;
}
pr_debug("PM: Preparing devices for restore.\n");
if ((error = device_suspend(PMSG_FREEZE))) {
printk("Some devices failed to suspend\n");
goto Free;
swsusp_free();
goto Thaw;
}
mb();
@@ -268,9 +266,7 @@ static int software_resume(void)
swsusp_resume();
pr_debug("PM: Restore failed, recovering.n");
device_resume();
Free:
swsusp_free();
Cleanup:
Thaw:
unprepare_processes();
Done:
/* For success case, the suspend path will release the lock */

View File

@@ -167,6 +167,8 @@ static int enter_state(suspend_state_t state)
{
int error;
if (pm_ops->valid && !pm_ops->valid(state))
return -ENODEV;
if (down_trylock(&pm_sem))
return -EBUSY;
@@ -236,7 +238,8 @@ static ssize_t state_show(struct subsystem * subsys, char * buf)
char * s = buf;
for (i = 0; i < PM_SUSPEND_MAX; i++) {
if (pm_states[i])
if (pm_states[i] && pm_ops && (!pm_ops->valid
||(pm_ops->valid && pm_ops->valid(i))))
s += sprintf(s,"%s ",pm_states[i]);
}
s += sprintf(s,"\n");

View File

@@ -53,3 +53,20 @@ extern void thaw_processes(void);
extern int pm_prepare_console(void);
extern void pm_restore_console(void);
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
extern unsigned int nr_copy_pages;
extern suspend_pagedir_t *pagedir_nosave;
extern suspend_pagedir_t *pagedir_save;
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
extern int restore_highmem(void);
extern struct pbe * alloc_pagedir(unsigned nr_pages);
extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages);
extern void swsusp_free(void);
extern int enough_swap(unsigned nr_pages);

435
kernel/power/snapshot.c Normal file
View File

@@ -0,0 +1,435 @@
/*
* linux/kernel/power/snapshot.c
*
* This file provide system snapshot/restore functionality.
*
* Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
*
* This file is released under the GPLv2, and is based on swsusp.c.
*
*/
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/suspend.h>
#include <linux/smp_lock.h>
#include <linux/delay.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/kernel.h>
#include <linux/pm.h>
#include <linux/device.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/console.h>
#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
#include "power.h"
#ifdef CONFIG_HIGHMEM
struct highmem_page {
char *data;
struct page *page;
struct highmem_page *next;
};
static struct highmem_page *highmem_copy;
static int save_highmem_zone(struct zone *zone)
{
unsigned long zone_pfn;
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
struct page *page;
struct highmem_page *save;
void *kaddr;
unsigned long pfn = zone_pfn + zone->zone_start_pfn;
if (!(pfn%1000))
printk(".");
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
/*
* This condition results from rvmalloc() sans vmalloc_32()
* and architectural memory reservations. This should be
* corrected eventually when the cases giving rise to this
* are better understood.
*/
if (PageReserved(page)) {
printk("highmem reserved page?!\n");
continue;
}
BUG_ON(PageNosave(page));
if (PageNosaveFree(page))
continue;
save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
if (!save)
return -ENOMEM;
save->next = highmem_copy;
save->page = page;
save->data = (void *) get_zeroed_page(GFP_ATOMIC);
if (!save->data) {
kfree(save);
return -ENOMEM;
}
kaddr = kmap_atomic(page, KM_USER0);
memcpy(save->data, kaddr, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
highmem_copy = save;
}
return 0;
}
static int save_highmem(void)
{
struct zone *zone;
int res = 0;
pr_debug("swsusp: Saving Highmem\n");
for_each_zone (zone) {
if (is_highmem(zone))
res = save_highmem_zone(zone);
if (res)
return res;
}
return 0;
}
int restore_highmem(void)
{
printk("swsusp: Restoring Highmem\n");
while (highmem_copy) {
struct highmem_page *save = highmem_copy;
void *kaddr;
highmem_copy = save->next;
kaddr = kmap_atomic(save->page, KM_USER0);
memcpy(kaddr, save->data, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
free_page((long) save->data);
kfree(save);
}
return 0;
}
#else
static int save_highmem(void) { return 0; }
int restore_highmem(void) { return 0; }
#endif /* CONFIG_HIGHMEM */
static int pfn_is_nosave(unsigned long pfn)
{
unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
/**
* saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
*/
static int saveable(struct zone *zone, unsigned long *zone_pfn)
{
unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
struct page *page;
if (!pfn_valid(pfn))
return 0;
page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
if (PageNosaveFree(page))
return 0;
return 1;
}
static unsigned count_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
unsigned n;
n = 0;
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
n += saveable(zone, &zone_pfn);
}
return n;
}
static void copy_data_pages(struct pbe *pblist)
{
struct zone *zone;
unsigned long zone_pfn;
struct pbe *pbe, *p;
pbe = pblist;
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
/* This is necessary for swsusp_free() */
for_each_pb_page (p, pblist)
SetPageNosaveFree(virt_to_page(p));
for_each_pbe (p, pblist)
SetPageNosaveFree(virt_to_page(p->address));
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page *page;
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
BUG_ON(!pbe);
pbe->orig_address = (unsigned long)page_address(page);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
pbe = pbe->next;
}
}
}
BUG_ON(pbe);
}
/**
* free_pagedir - free pages allocated with alloc_pagedir()
*/
static void free_pagedir(struct pbe *pblist)
{
struct pbe *pbe;
while (pblist) {
pbe = (pblist + PB_PAGE_SKIP)->next;
ClearPageNosave(virt_to_page(pblist));
ClearPageNosaveFree(virt_to_page(pblist));
free_page((unsigned long)pblist);
pblist = pbe;
}
}
/**
* fill_pb_page - Create a list of PBEs on a given memory page
*/
static inline void fill_pb_page(struct pbe *pbpage)
{
struct pbe *p;
p = pbpage;
pbpage += PB_PAGE_SKIP;
do
p->next = p + 1;
while (++p < pbpage);
}
/**
* create_pbe_list - Create a list of PBEs on top of a given chain
* of memory pages allocated with alloc_pagedir()
*/
void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
{
struct pbe *pbpage, *p;
unsigned num = PBES_PER_PAGE;
for_each_pb_page (pbpage, pblist) {
if (num >= nr_pages)
break;
fill_pb_page(pbpage);
num += PBES_PER_PAGE;
}
if (pbpage) {
for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
p->next = p + 1;
p->next = NULL;
}
pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
}
static void *alloc_image_page(void)
{
void *res = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if (res) {
SetPageNosave(virt_to_page(res));
SetPageNosaveFree(virt_to_page(res));
}
return res;
}
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many pages we need and
* allocate them.
*
* We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
* struct pbe elements (pbes) and the last element in the page points
* to the next page.
*
* On each page we set up a list of struct_pbe elements.
*/
struct pbe *alloc_pagedir(unsigned nr_pages)
{
unsigned num;
struct pbe *pblist, *pbe;
if (!nr_pages)
return NULL;
pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
pblist = alloc_image_page();
/* FIXME: rewrite this ugly loop */
for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
pbe = pbe->next, num += PBES_PER_PAGE) {
pbe += PB_PAGE_SKIP;
pbe->next = alloc_image_page();
}
if (!pbe) { /* get_zeroed_page() failed */
free_pagedir(pblist);
pblist = NULL;
}
return pblist;
}
/**
* Free pages we allocated for suspend. Suspend pages are alocated
* before atomic copy, so we need to free them after resume.
*/
void swsusp_free(void)
{
struct zone *zone;
unsigned long zone_pfn;
for_each_zone(zone) {
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
if (pfn_valid(zone_pfn + zone->zone_start_pfn)) {
struct page * page;
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
if (PageNosave(page) && PageNosaveFree(page)) {
ClearPageNosave(page);
ClearPageNosaveFree(page);
free_page((long) page_address(page));
}
}
}
}
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(unsigned nr_pages)
{
pr_debug("swsusp: available memory: %u pages\n", nr_free_pages());
return nr_free_pages() > (nr_pages + PAGES_FOR_IO +
(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
}
static struct pbe *swsusp_alloc(unsigned nr_pages)
{
struct pbe *pblist, *p;
if (!(pblist = alloc_pagedir(nr_pages))) {
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return NULL;
}
create_pbe_list(pblist, nr_pages);
for_each_pbe (p, pblist) {
p->address = (unsigned long)alloc_image_page();
if (!p->address) {
printk(KERN_ERR "suspend: Allocating image pages failed.\n");
swsusp_free();
return NULL;
}
}
return pblist;
}
asmlinkage int swsusp_save(void)
{
unsigned nr_pages;
pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
printk(KERN_CRIT "swsusp: Not enough free pages for highmem\n");
restore_highmem();
return -ENOMEM;
}
drain_local_pages();
nr_pages = count_data_pages();
printk("swsusp: Need to copy %u pages\n", nr_pages);
pr_debug("swsusp: pages needed: %u + %lu + %u, free: %u\n",
nr_pages,
(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
PAGES_FOR_IO, nr_free_pages());
/* This is needed because of the fixed size of swsusp_info */
if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE)
return -ENOSPC;
if (!enough_free_mem(nr_pages)) {
printk(KERN_ERR "swsusp: Not enough free memory\n");
return -ENOMEM;
}
if (!enough_swap(nr_pages)) {
printk(KERN_ERR "swsusp: Not enough free swap\n");
return -ENOSPC;
}
pagedir_nosave = swsusp_alloc(nr_pages);
if (!pagedir_nosave)
return -ENOMEM;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
copy_data_pages(pagedir_nosave);
/*
* End of critical section. From now on, we can write to memory,
* but we should not touch disk. This specially means we must _not_
* touch swap space! Except we must write out our image of course.
*/
nr_copy_pages = nr_pages;
printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
return 0;
}

View File

@@ -1,11 +1,10 @@
/*
* linux/kernel/power/swsusp.c
*
* This file is to realize architecture-independent
* machine suspend feature using pretty near only high-level routines
* This file provides code to write suspend image to swap and read it back.
*
* Copyright (C) 1998-2001 Gabor Kuti <seasons@fornax.hu>
* Copyright (C) 1998,2001-2004 Pavel Machek <pavel@suse.cz>
* Copyright (C) 1998,2001-2005 Pavel Machek <pavel@suse.cz>
*
* This file is released under the GPLv2.
*
@@ -47,11 +46,7 @@
#include <linux/utsname.h>
#include <linux/version.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/bitops.h>
#include <linux/vt_kern.h>
#include <linux/kbd_kern.h>
#include <linux/keyboard.h>
#include <linux/spinlock.h>
#include <linux/genhd.h>
#include <linux/kernel.h>
@@ -63,10 +58,8 @@
#include <linux/swapops.h>
#include <linux/bootmem.h>
#include <linux/syscalls.h>
#include <linux/console.h>
#include <linux/highmem.h>
#include <linux/bio.h>
#include <linux/mount.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -84,16 +77,10 @@
#define MAXKEY 32
#define MAXIV 32
/* References to section boundaries */
extern const void __nosave_begin, __nosave_end;
/* Variables to be preserved over suspend */
static int nr_copy_pages_check;
extern char resume_file[];
/* Local variables that should not be affected by save */
static unsigned int nr_copy_pages __nosavedata = 0;
unsigned int nr_copy_pages __nosavedata = 0;
/* Suspend pagedir is allocated before final copy, therefore it
must be freed after resume
@@ -109,7 +96,7 @@ static unsigned int nr_copy_pages __nosavedata = 0;
MMU hardware.
*/
suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
static suspend_pagedir_t *pagedir_save;
suspend_pagedir_t *pagedir_save;
#define SWSUSP_SIG "S1SUSPEND"
@@ -123,12 +110,6 @@ static struct swsusp_header {
static struct swsusp_info swsusp_info;
/*
* XXX: We try to keep some more pages free so that I/O operations succeed
* without paging. Might this be more?
*/
#define PAGES_FOR_IO 512
/*
* Saving part...
*/
@@ -552,346 +533,6 @@ static int write_suspend_image(void)
goto Done;
}
#ifdef CONFIG_HIGHMEM
struct highmem_page {
char *data;
struct page *page;
struct highmem_page *next;
};
static struct highmem_page *highmem_copy;
static int save_highmem_zone(struct zone *zone)
{
unsigned long zone_pfn;
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
struct page *page;
struct highmem_page *save;
void *kaddr;
unsigned long pfn = zone_pfn + zone->zone_start_pfn;
if (!(pfn%1000))
printk(".");
if (!pfn_valid(pfn))
continue;
page = pfn_to_page(pfn);
/*
* This condition results from rvmalloc() sans vmalloc_32()
* and architectural memory reservations. This should be
* corrected eventually when the cases giving rise to this
* are better understood.
*/
if (PageReserved(page)) {
printk("highmem reserved page?!\n");
continue;
}
BUG_ON(PageNosave(page));
if (PageNosaveFree(page))
continue;
save = kmalloc(sizeof(struct highmem_page), GFP_ATOMIC);
if (!save)
return -ENOMEM;
save->next = highmem_copy;
save->page = page;
save->data = (void *) get_zeroed_page(GFP_ATOMIC);
if (!save->data) {
kfree(save);
return -ENOMEM;
}
kaddr = kmap_atomic(page, KM_USER0);
memcpy(save->data, kaddr, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
highmem_copy = save;
}
return 0;
}
#endif /* CONFIG_HIGHMEM */
static int save_highmem(void)
{
#ifdef CONFIG_HIGHMEM
struct zone *zone;
int res = 0;
pr_debug("swsusp: Saving Highmem\n");
for_each_zone (zone) {
if (is_highmem(zone))
res = save_highmem_zone(zone);
if (res)
return res;
}
#endif
return 0;
}
static int restore_highmem(void)
{
#ifdef CONFIG_HIGHMEM
printk("swsusp: Restoring Highmem\n");
while (highmem_copy) {
struct highmem_page *save = highmem_copy;
void *kaddr;
highmem_copy = save->next;
kaddr = kmap_atomic(save->page, KM_USER0);
memcpy(kaddr, save->data, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
free_page((long) save->data);
kfree(save);
}
#endif
return 0;
}
static int pfn_is_nosave(unsigned long pfn)
{
unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT;
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT;
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
}
/**
* saveable - Determine whether a page should be cloned or not.
* @pfn: The page
*
* We save a page if it's Reserved, and not in the range of pages
* statically defined as 'unsaveable', or if it isn't reserved, and
* isn't part of a free chunk of pages.
*/
static int saveable(struct zone * zone, unsigned long * zone_pfn)
{
unsigned long pfn = *zone_pfn + zone->zone_start_pfn;
struct page * page;
if (!pfn_valid(pfn))
return 0;
page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
if (PageNosaveFree(page))
return 0;
return 1;
}
static void count_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
nr_copy_pages = 0;
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
nr_copy_pages += saveable(zone, &zone_pfn);
}
}
static void copy_data_pages(void)
{
struct zone *zone;
unsigned long zone_pfn;
struct pbe * pbe = pagedir_nosave;
pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) {
if (saveable(zone, &zone_pfn)) {
struct page * page;
page = pfn_to_page(zone_pfn + zone->zone_start_pfn);
BUG_ON(!pbe);
pbe->orig_address = (long) page_address(page);
/* copy_page is not usable for copying task structs. */
memcpy((void *)pbe->address, (void *)pbe->orig_address, PAGE_SIZE);
pbe = pbe->next;
}
}
}
BUG_ON(pbe);
}
/**
* calc_nr - Determine the number of pages needed for a pbe list.
*/
static int calc_nr(int nr_copy)
{
return nr_copy + (nr_copy+PBES_PER_PAGE-2)/(PBES_PER_PAGE-1);
}
/**
* free_pagedir - free pages allocated with alloc_pagedir()
*/
static inline void free_pagedir(struct pbe *pblist)
{
struct pbe *pbe;
while (pblist) {
pbe = (pblist + PB_PAGE_SKIP)->next;
free_page((unsigned long)pblist);
pblist = pbe;
}
}
/**
* fill_pb_page - Create a list of PBEs on a given memory page
*/
static inline void fill_pb_page(struct pbe *pbpage)
{
struct pbe *p;
p = pbpage;
pbpage += PB_PAGE_SKIP;
do
p->next = p + 1;
while (++p < pbpage);
}
/**
* create_pbe_list - Create a list of PBEs on top of a given chain
* of memory pages allocated with alloc_pagedir()
*/
static void create_pbe_list(struct pbe *pblist, unsigned nr_pages)
{
struct pbe *pbpage, *p;
unsigned num = PBES_PER_PAGE;
for_each_pb_page (pbpage, pblist) {
if (num >= nr_pages)
break;
fill_pb_page(pbpage);
num += PBES_PER_PAGE;
}
if (pbpage) {
for (num -= PBES_PER_PAGE - 1, p = pbpage; num < nr_pages; p++, num++)
p->next = p + 1;
p->next = NULL;
}
pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
}
/**
* alloc_pagedir - Allocate the page directory.
*
* First, determine exactly how many pages we need and
* allocate them.
*
* We arrange the pages in a chain: each page is an array of PBES_PER_PAGE
* struct pbe elements (pbes) and the last element in the page points
* to the next page.
*
* On each page we set up a list of struct_pbe elements.
*/
static struct pbe * alloc_pagedir(unsigned nr_pages)
{
unsigned num;
struct pbe *pblist, *pbe;
if (!nr_pages)
return NULL;
pr_debug("alloc_pagedir(): nr_pages = %d\n", nr_pages);
pblist = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
for (pbe = pblist, num = PBES_PER_PAGE; pbe && num < nr_pages;
pbe = pbe->next, num += PBES_PER_PAGE) {
pbe += PB_PAGE_SKIP;
pbe->next = (struct pbe *)get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
}
if (!pbe) { /* get_zeroed_page() failed */
free_pagedir(pblist);
pblist = NULL;
}
return pblist;
}
/**
* free_image_pages - Free pages allocated for snapshot
*/
static void free_image_pages(void)
{
struct pbe * p;
for_each_pbe (p, pagedir_save) {
if (p->address) {
ClearPageNosave(virt_to_page(p->address));
free_page(p->address);
p->address = 0;
}
}
}
/**
* alloc_image_pages - Allocate pages for the snapshot.
*/
static int alloc_image_pages(void)
{
struct pbe * p;
for_each_pbe (p, pagedir_save) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if (!p->address)
return -ENOMEM;
SetPageNosave(virt_to_page(p->address));
}
return 0;
}
/* Free pages we allocated for suspend. Suspend pages are alocated
* before atomic copy, so we need to free them after resume.
*/
void swsusp_free(void)
{
BUG_ON(PageNosave(virt_to_page(pagedir_save)));
BUG_ON(PageNosaveFree(virt_to_page(pagedir_save)));
free_image_pages();
free_pagedir(pagedir_save);
}
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
static int enough_free_mem(void)
{
if (nr_free_pages() < (nr_copy_pages + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough free pages: Have %d\n",
nr_free_pages());
return 0;
}
return 1;
}
/**
* enough_swap - Make sure we have enough swap to save the image.
*
@@ -902,87 +543,14 @@ static int enough_free_mem(void)
* We should only consider resume_device.
*/
static int enough_swap(void)
int enough_swap(unsigned nr_pages)
{
struct sysinfo i;
si_swapinfo(&i);
if (i.freeswap < (nr_copy_pages + PAGES_FOR_IO)) {
pr_debug("swsusp: Not enough swap. Need %ld\n",i.freeswap);
return 0;
}
return 1;
}
static int swsusp_alloc(void)
{
int error;
pagedir_nosave = NULL;
nr_copy_pages = calc_nr(nr_copy_pages);
nr_copy_pages_check = nr_copy_pages;
pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
if (!enough_free_mem())
return -ENOMEM;
if (!enough_swap())
return -ENOSPC;
if (MAX_PBES < nr_copy_pages / PBES_PER_PAGE +
!!(nr_copy_pages % PBES_PER_PAGE))
return -ENOSPC;
if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return -ENOMEM;
}
create_pbe_list(pagedir_save, nr_copy_pages);
pagedir_nosave = pagedir_save;
if ((error = alloc_image_pages())) {
printk(KERN_ERR "suspend: Allocating image pages failed.\n");
swsusp_free();
return error;
}
return 0;
}
static int suspend_prepare_image(void)
{
int error;
pr_debug("swsusp: critical section: \n");
if (save_highmem()) {
printk(KERN_CRIT "Suspend machine: Not enough free pages for highmem\n");
restore_highmem();
return -ENOMEM;
}
drain_local_pages();
count_data_pages();
printk("swsusp: Need to copy %u pages\n", nr_copy_pages);
error = swsusp_alloc();
if (error)
return error;
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
copy_data_pages();
/*
* End of critical section. From now on, we can write to memory,
* but we should not touch disk. This specially means we must _not_
* touch swap space! Except we must write out our image of course.
*/
printk("swsusp: critical section/: done (%d pages copied)\n", nr_copy_pages );
return 0;
pr_debug("swsusp: available swap: %lu pages\n", i.freeswap);
return i.freeswap > (nr_pages + PAGES_FOR_IO +
(nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE);
}
@@ -994,7 +562,7 @@ static int suspend_prepare_image(void)
int swsusp_write(void)
{
int error;
device_resume();
lock_swapdevices();
error = write_suspend_image();
/* This will unlock ignored swap devices since writing is finished */
@@ -1004,14 +572,6 @@ int swsusp_write(void)
}
extern asmlinkage int swsusp_arch_suspend(void);
extern asmlinkage int swsusp_arch_resume(void);
asmlinkage int swsusp_save(void)
{
return suspend_prepare_image();
}
int swsusp_suspend(void)
{
@@ -1043,7 +603,6 @@ int swsusp_suspend(void)
printk(KERN_ERR "Error %d suspending\n", error);
/* Restore control flow magically appears here */
restore_processor_state();
BUG_ON (nr_copy_pages_check != nr_copy_pages);
restore_highmem();
device_power_up();
local_irq_enable();
@@ -1063,6 +622,11 @@ int swsusp_resume(void)
* execution continues at place where swsusp_arch_suspend was called
*/
BUG_ON(!error);
/* The only reason why swsusp_arch_resume() can fail is memory being
* very tight, so we have to free it as soon as we can to avoid
* subsequent failures
*/
swsusp_free();
restore_processor_state();
restore_highmem();
touch_softlockup_watchdog();
@@ -1078,54 +642,28 @@ int swsusp_resume(void)
*
* We don't know which pages are usable until we allocate them.
*
* Allocated but unusable (ie eaten) memory pages are linked together
* to create a list, so that we can free them easily
*
* We could have used a type other than (void *)
* for this purpose, but ...
* Allocated but unusable (ie eaten) memory pages are marked so that
* swsusp_free() can release them
*/
static void **eaten_memory = NULL;
static inline void eat_page(void *page)
{
void **c;
c = eaten_memory;
eaten_memory = page;
*eaten_memory = c;
}
unsigned long get_usable_page(unsigned gfp_mask)
unsigned long get_safe_page(gfp_t gfp_mask)
{
unsigned long m;
m = get_zeroed_page(gfp_mask);
while (!PageNosaveFree(virt_to_page(m))) {
eat_page((void *)m);
do {
m = get_zeroed_page(gfp_mask);
if (!m)
break;
if (m && PageNosaveFree(virt_to_page(m)))
/* This is for swsusp_free() */
SetPageNosave(virt_to_page(m));
} while (m && PageNosaveFree(virt_to_page(m)));
if (m) {
/* This is for swsusp_free() */
SetPageNosave(virt_to_page(m));
SetPageNosaveFree(virt_to_page(m));
}
return m;
}
void free_eaten_memory(void)
{
unsigned long m;
void **c;
int i = 0;
c = eaten_memory;
while (c) {
m = (unsigned long)c;
c = *c;
free_page(m);
i++;
}
eaten_memory = NULL;
pr_debug("swsusp: %d unused pages freed\n", i);
}
/**
* check_pagedir - We ensure here that pages that the PBEs point to
* won't collide with pages where we're going to restore from the loaded
@@ -1143,7 +681,7 @@ static int check_pagedir(struct pbe *pblist)
p->address = 0UL;
for_each_pbe (p, pblist) {
p->address = get_usable_page(GFP_ATOMIC);
p->address = get_safe_page(GFP_ATOMIC);
if (!p->address)
return -ENOMEM;
}
@@ -1162,7 +700,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
unsigned long zone_pfn;
struct pbe *pbpage, *tail, *p;
void *m;
int rel = 0, error = 0;
int rel = 0;
if (!pblist) /* a sanity check */
return NULL;
@@ -1170,41 +708,37 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
pr_debug("swsusp: Relocating pagedir (%lu pages to check)\n",
swsusp_info.pagedir_pages);
/* Set page flags */
/* Clear page flags */
for_each_zone (zone) {
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
SetPageNosaveFree(pfn_to_page(zone_pfn +
if (pfn_valid(zone_pfn + zone->zone_start_pfn))
ClearPageNosaveFree(pfn_to_page(zone_pfn +
zone->zone_start_pfn));
}
/* Clear orig addresses */
/* Mark orig addresses */
for_each_pbe (p, pblist)
ClearPageNosaveFree(virt_to_page(p->orig_address));
SetPageNosaveFree(virt_to_page(p->orig_address));
tail = pblist + PB_PAGE_SKIP;
/* Relocate colliding pages */
for_each_pb_page (pbpage, pblist) {
if (!PageNosaveFree(virt_to_page((unsigned long)pbpage))) {
m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
if (!m) {
error = -ENOMEM;
break;
}
if (PageNosaveFree(virt_to_page((unsigned long)pbpage))) {
m = (void *)get_safe_page(GFP_ATOMIC | __GFP_COLD);
if (!m)
return NULL;
memcpy(m, (void *)pbpage, PAGE_SIZE);
if (pbpage == pblist)
pblist = (struct pbe *)m;
else
tail->next = (struct pbe *)m;
eat_page((void *)pbpage);
pbpage = (struct pbe *)m;
/* We have to link the PBEs again */
for (p = pbpage; p < pbpage + PB_PAGE_SKIP; p++)
if (p->next) /* needed to save the end */
p->next = p + 1;
@@ -1214,15 +748,13 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
tail = pbpage + PB_PAGE_SKIP;
}
if (error) {
printk("\nswsusp: Out of memory\n\n");
free_pagedir(pblist);
free_eaten_memory();
pblist = NULL;
/* Is this even worth handling? It should never ever happen, and we
have just lost user's state, anyway... */
} else
printk("swsusp: Relocated %d pages\n", rel);
/* This is for swsusp_free() */
for_each_pb_page (pbpage, pblist) {
SetPageNosave(virt_to_page(pbpage));
SetPageNosaveFree(virt_to_page(pbpage));
}
printk("swsusp: Relocated %d pages\n", rel);
return pblist;
}
@@ -1440,9 +972,7 @@ static int read_pagedir(struct pbe *pblist)
break;
}
if (error)
free_pagedir(pblist);
else
if (!error)
BUG_ON(i != swsusp_info.pagedir_pages);
return error;
@@ -1485,15 +1015,6 @@ static int read_suspend_image(void)
if (!error)
error = data_read(pagedir_nosave);
if (error) { /* We fail cleanly */
free_eaten_memory();
for_each_pbe (p, pagedir_nosave)
if (p->address) {
free_page(p->address);
p->address = 0UL;
}
free_pagedir(pagedir_nosave);
}
return error;
}

View File

@@ -10,7 +10,7 @@
* elsewhere, in preparation for a serial line console (someday).
* Ted Ts'o, 2/11/93.
* Modified for sysctl support, 1/8/97, Chris Horn.
* Fixed SMP synchronization, 08/08/99, Manfred Spraul
* Fixed SMP synchronization, 08/08/99, Manfred Spraul
* manfreds@colorfullife.com
* Rewrote bits to get rid of console_lock
* 01Mar01 Andrew Morton <andrewm@uow.edu.au>
@@ -148,7 +148,7 @@ static int __init console_setup(char *str)
if (!strcmp(str, "ttyb"))
strcpy(name, "ttyS1");
#endif
for(s = name; *s; s++)
for (s = name; *s; s++)
if ((*s >= '0' && *s <= '9') || *s == ',')
break;
idx = simple_strtoul(s, NULL, 10);
@@ -169,11 +169,11 @@ static int __init log_buf_len_setup(char *str)
size = roundup_pow_of_two(size);
if (size > log_buf_len) {
unsigned long start, dest_idx, offset;
char * new_log_buf;
char *new_log_buf;
new_log_buf = alloc_bootmem(size);
if (!new_log_buf) {
printk("log_buf_len: allocation failed\n");
printk(KERN_WARNING "log_buf_len: allocation failed\n");
goto out;
}
@@ -193,10 +193,9 @@ static int __init log_buf_len_setup(char *str)
log_end -= offset;
spin_unlock_irqrestore(&logbuf_lock, flags);
printk("log_buf_len: %d\n", log_buf_len);
printk(KERN_NOTICE "log_buf_len: %d\n", log_buf_len);
}
out:
return 1;
}
@@ -217,7 +216,7 @@ __setup("log_buf_len=", log_buf_len_setup);
* 9 -- Return number of unread characters in the log buffer
* 10 -- Return size of the log buffer
*/
int do_syslog(int type, char __user * buf, int len)
int do_syslog(int type, char __user *buf, int len)
{
unsigned long i, j, limit, count;
int do_clear = 0;
@@ -244,7 +243,8 @@ int do_syslog(int type, char __user * buf, int len)
error = -EFAULT;
goto out;
}
error = wait_event_interruptible(log_wait, (log_start - log_end));
error = wait_event_interruptible(log_wait,
(log_start - log_end));
if (error)
goto out;
i = 0;
@@ -264,7 +264,7 @@ int do_syslog(int type, char __user * buf, int len)
error = i;
break;
case 4: /* Read/clear last kernel messages */
do_clear = 1;
do_clear = 1;
/* FALL THRU */
case 3: /* Read last kernel messages */
error = -EINVAL;
@@ -288,11 +288,11 @@ int do_syslog(int type, char __user * buf, int len)
limit = log_end;
/*
* __put_user() could sleep, and while we sleep
* printk() could overwrite the messages
* printk() could overwrite the messages
* we try to copy to user space. Therefore
* the messages are copied in reverse. <manfreds>
*/
for(i = 0; i < count && !error; i++) {
for (i = 0; i < count && !error; i++) {
j = limit-1-i;
if (j + log_buf_len < log_end)
break;
@@ -306,10 +306,10 @@ int do_syslog(int type, char __user * buf, int len)
if (error)
break;
error = i;
if(i != count) {
if (i != count) {
int offset = count-error;
/* buffer overflow during copy, correct user buffer. */
for(i=0;i<error;i++) {
for (i = 0; i < error; i++) {
if (__get_user(c,&buf[i+offset]) ||
__put_user(c,&buf[i])) {
error = -EFAULT;
@@ -351,7 +351,7 @@ out:
return error;
}
asmlinkage long sys_syslog(int type, char __user * buf, int len)
asmlinkage long sys_syslog(int type, char __user *buf, int len)
{
return do_syslog(type, buf, len);
}
@@ -404,21 +404,19 @@ static void call_console_drivers(unsigned long start, unsigned long end)
cur_index = start;
start_print = start;
while (cur_index != end) {
if ( msg_level < 0 &&
((end - cur_index) > 2) &&
LOG_BUF(cur_index + 0) == '<' &&
LOG_BUF(cur_index + 1) >= '0' &&
LOG_BUF(cur_index + 1) <= '7' &&
LOG_BUF(cur_index + 2) == '>')
{
if (msg_level < 0 && ((end - cur_index) > 2) &&
LOG_BUF(cur_index + 0) == '<' &&
LOG_BUF(cur_index + 1) >= '0' &&
LOG_BUF(cur_index + 1) <= '7' &&
LOG_BUF(cur_index + 2) == '>') {
msg_level = LOG_BUF(cur_index + 1) - '0';
cur_index += 3;
start_print = cur_index;
}
while (cur_index != end) {
char c = LOG_BUF(cur_index);
cur_index++;
cur_index++;
if (c == '\n') {
if (msg_level < 0) {
/*
@@ -461,7 +459,7 @@ static void zap_locks(void)
static unsigned long oops_timestamp;
if (time_after_eq(jiffies, oops_timestamp) &&
!time_after(jiffies, oops_timestamp + 30*HZ))
!time_after(jiffies, oops_timestamp + 30 * HZ))
return;
oops_timestamp = jiffies;
@@ -495,7 +493,7 @@ __attribute__((weak)) unsigned long long printk_clock(void)
/*
* This is printk. It can be called from any context. We want it to work.
*
*
* We try to grab the console_sem. If we succeed, it's easy - we log the output and
* call the console drivers. If we fail to get the semaphore we place the output
* into the log buffer and return. The current holder of the console_sem will
@@ -639,13 +637,19 @@ EXPORT_SYMBOL(vprintk);
#else
asmlinkage long sys_syslog(int type, char __user * buf, int len)
asmlinkage long sys_syslog(int type, char __user *buf, int len)
{
return 0;
}
int do_syslog(int type, char __user * buf, int len) { return 0; }
static void call_console_drivers(unsigned long start, unsigned long end) {}
int do_syslog(int type, char __user *buf, int len)
{
return 0;
}
static void call_console_drivers(unsigned long start, unsigned long end)
{
}
#endif
@@ -851,9 +855,9 @@ EXPORT_SYMBOL(console_start);
* print any messages that were printed by the kernel before the
* console driver was initialized.
*/
void register_console(struct console * console)
void register_console(struct console *console)
{
int i;
int i;
unsigned long flags;
if (preferred_console < 0)
@@ -878,7 +882,8 @@ void register_console(struct console * console)
* See if this console matches one we selected on
* the command line.
*/
for(i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) {
for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0];
i++) {
if (strcmp(console_cmdline[i].name, console->name) != 0)
continue;
if (console->index >= 0 &&
@@ -933,9 +938,9 @@ void register_console(struct console * console)
}
EXPORT_SYMBOL(register_console);
int unregister_console(struct console * console)
int unregister_console(struct console *console)
{
struct console *a,*b;
struct console *a, *b;
int res = 1;
acquire_console_sem();
@@ -949,10 +954,10 @@ int unregister_console(struct console * console)
b->next = a->next;
res = 0;
break;
}
}
}
}
/* If last console is removed, we re-enable picking the first
* one that gets registered. Without that, pmac early boot console
* would prevent fbcon from taking over.
@@ -994,7 +999,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
{
static DEFINE_SPINLOCK(ratelimit_lock);
static unsigned long toks = 10*5*HZ;
static unsigned long toks = 10 * 5 * HZ;
static unsigned long last_msg;
static int missed;
unsigned long flags;
@@ -1007,6 +1012,7 @@ int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
toks = ratelimit_burst * ratelimit_jiffies;
if (toks >= ratelimit_jiffies) {
int lost = missed;
missed = 0;
toks -= ratelimit_jiffies;
spin_unlock_irqrestore(&ratelimit_lock, flags);
@@ -1021,7 +1027,7 @@ int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst)
EXPORT_SYMBOL(__printk_ratelimit);
/* minimum time in jiffies between messages */
int printk_ratelimit_jiffies = 5*HZ;
int printk_ratelimit_jiffies = 5 * HZ;
/* number of messages we send before ratelimiting */
int printk_ratelimit_burst = 10;

View File

@@ -56,6 +56,10 @@ void ptrace_untrace(task_t *child)
signal_wake_up(child, 1);
}
}
if (child->signal->flags & SIGNAL_GROUP_EXIT) {
sigaddset(&child->pending.signal, SIGKILL);
signal_wake_up(child, 1);
}
spin_unlock(&child->sighand->siglock);
}
@@ -77,8 +81,7 @@ void __ptrace_unlink(task_t *child)
SET_LINKS(child);
}
if (child->state == TASK_TRACED)
ptrace_untrace(child);
ptrace_untrace(child);
}
/*

View File

@@ -153,6 +153,15 @@ void fastcall call_rcu_bh(struct rcu_head *head,
local_irq_restore(flags);
}
/*
* Return the number of RCU batches processed thus far. Useful
* for debug and statistics.
*/
long rcu_batches_completed(void)
{
return rcu_ctrlblk.completed;
}
/*
* Invoke the completed RCU callbacks. They are expected to be in
* a per-cpu list.
@@ -501,6 +510,7 @@ void synchronize_kernel(void)
}
module_param(maxbatch, int, 0);
EXPORT_SYMBOL_GPL(rcu_batches_completed);
EXPORT_SYMBOL(call_rcu); /* WARNING: GPL-only in April 2006. */
EXPORT_SYMBOL(call_rcu_bh); /* WARNING: GPL-only in April 2006. */
EXPORT_SYMBOL_GPL(synchronize_rcu);

492
kernel/rcutorture.c Normal file
View File

@@ -0,0 +1,492 @@
/*
* Read-Copy Update /proc-based torture test facility
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2005
*
* Authors: Paul E. McKenney <paulmck@us.ibm.com>
*
* See also: Documentation/RCU/torture.txt
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/err.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/rcupdate.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/moduleparam.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/rcuref.h>
#include <linux/cpu.h>
#include <linux/random.h>
#include <linux/delay.h>
#include <linux/byteorder/swabb.h>
#include <linux/stat.h>
MODULE_LICENSE("GPL");
static int nreaders = -1; /* # reader threads, defaults to 4*ncpus */
static int stat_interval = 0; /* Interval between stats, in seconds. */
/* Defaults to "only at end of test". */
static int verbose = 0; /* Print more debug info. */
MODULE_PARM(nreaders, "i");
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
MODULE_PARM(stat_interval, "i");
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
MODULE_PARM(verbose, "i");
MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
#define TORTURE_FLAG "rcutorture: "
#define PRINTK_STRING(s) \
do { printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0)
#define VERBOSE_PRINTK_STRING(s) \
do { if (verbose) printk(KERN_ALERT TORTURE_FLAG s "\n"); } while (0)
#define VERBOSE_PRINTK_ERRSTRING(s) \
do { if (verbose) printk(KERN_ALERT TORTURE_FLAG "!!! " s "\n"); } while (0)
static char printk_buf[4096];
static int nrealreaders;
static struct task_struct *writer_task;
static struct task_struct **reader_tasks;
static struct task_struct *stats_task;
#define RCU_TORTURE_PIPE_LEN 10
struct rcu_torture {
struct rcu_head rtort_rcu;
int rtort_pipe_count;
struct list_head rtort_free;
};
static int fullstop = 0; /* stop generating callbacks at test end. */
static LIST_HEAD(rcu_torture_freelist);
static struct rcu_torture *rcu_torture_current = NULL;
static long rcu_torture_current_version = 0;
static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
static DEFINE_SPINLOCK(rcu_torture_lock);
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
{ 0 };
static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_batch) =
{ 0 };
static atomic_t rcu_torture_wcount[RCU_TORTURE_PIPE_LEN + 1];
atomic_t n_rcu_torture_alloc;
atomic_t n_rcu_torture_alloc_fail;
atomic_t n_rcu_torture_free;
/*
* Allocate an element from the rcu_tortures pool.
*/
struct rcu_torture *
rcu_torture_alloc(void)
{
struct list_head *p;
spin_lock(&rcu_torture_lock);
if (list_empty(&rcu_torture_freelist)) {
atomic_inc(&n_rcu_torture_alloc_fail);
spin_unlock(&rcu_torture_lock);
return NULL;
}
atomic_inc(&n_rcu_torture_alloc);
p = rcu_torture_freelist.next;
list_del_init(p);
spin_unlock(&rcu_torture_lock);
return container_of(p, struct rcu_torture, rtort_free);
}
/*
* Free an element to the rcu_tortures pool.
*/
static void
rcu_torture_free(struct rcu_torture *p)
{
atomic_inc(&n_rcu_torture_free);
spin_lock(&rcu_torture_lock);
list_add_tail(&p->rtort_free, &rcu_torture_freelist);
spin_unlock(&rcu_torture_lock);
}
static void
rcu_torture_cb(struct rcu_head *p)
{
int i;
struct rcu_torture *rp = container_of(p, struct rcu_torture, rtort_rcu);
if (fullstop) {
/* Test is ending, just drop callbacks on the floor. */
/* The next initialization will pick up the pieces. */
return;
}
i = rp->rtort_pipe_count;
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
if (++rp->rtort_pipe_count >= RCU_TORTURE_PIPE_LEN)
rcu_torture_free(rp);
else
call_rcu(p, rcu_torture_cb);
}
struct rcu_random_state {
unsigned long rrs_state;
unsigned long rrs_count;
};
#define RCU_RANDOM_MULT 39916801 /* prime */
#define RCU_RANDOM_ADD 479001701 /* prime */
#define RCU_RANDOM_REFRESH 10000
#define DEFINE_RCU_RANDOM(name) struct rcu_random_state name = { 0, 0 }
/*
* Crude but fast random-number generator. Uses a linear congruential
* generator, with occasional help from get_random_bytes().
*/
static long
rcu_random(struct rcu_random_state *rrsp)
{
long refresh;
if (--rrsp->rrs_count < 0) {
get_random_bytes(&refresh, sizeof(refresh));
rrsp->rrs_state += refresh;
rrsp->rrs_count = RCU_RANDOM_REFRESH;
}
rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
return swahw32(rrsp->rrs_state);
}
/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
* after a series of grace periods (the "pipeline").
*/
static int
rcu_torture_writer(void *arg)
{
int i;
long oldbatch = rcu_batches_completed();
struct rcu_torture *rp;
struct rcu_torture *old_rp;
static DEFINE_RCU_RANDOM(rand);
VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
do {
schedule_timeout_uninterruptible(1);
if (rcu_batches_completed() == oldbatch)
continue;
if ((rp = rcu_torture_alloc()) == NULL)
continue;
rp->rtort_pipe_count = 0;
udelay(rcu_random(&rand) & 0x3ff);
old_rp = rcu_torture_current;
rcu_assign_pointer(rcu_torture_current, rp);
smp_wmb();
if (old_rp != NULL) {
i = old_rp->rtort_pipe_count;
if (i > RCU_TORTURE_PIPE_LEN)
i = RCU_TORTURE_PIPE_LEN;
atomic_inc(&rcu_torture_wcount[i]);
old_rp->rtort_pipe_count++;
call_rcu(&old_rp->rtort_rcu, rcu_torture_cb);
}
rcu_torture_current_version++;
oldbatch = rcu_batches_completed();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
/*
* RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
* incrementing the corresponding element of the pipeline array. The
* counter in the element should never be greater than 1, otherwise, the
* RCU implementation is broken.
*/
static int
rcu_torture_reader(void *arg)
{
int completed;
DEFINE_RCU_RANDOM(rand);
struct rcu_torture *p;
int pipe_count;
VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
do {
rcu_read_lock();
completed = rcu_batches_completed();
p = rcu_dereference(rcu_torture_current);
if (p == NULL) {
/* Wait for rcu_torture_writer to get underway */
rcu_read_unlock();
schedule_timeout_interruptible(HZ);
continue;
}
udelay(rcu_random(&rand) & 0x7f);
preempt_disable();
pipe_count = p->rtort_pipe_count;
if (pipe_count > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
pipe_count = RCU_TORTURE_PIPE_LEN;
}
++__get_cpu_var(rcu_torture_count)[pipe_count];
completed = rcu_batches_completed() - completed;
if (completed > RCU_TORTURE_PIPE_LEN) {
/* Should not happen, but... */
completed = RCU_TORTURE_PIPE_LEN;
}
++__get_cpu_var(rcu_torture_batch)[completed];
preempt_enable();
rcu_read_unlock();
schedule();
} while (!kthread_should_stop() && !fullstop);
VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
while (!kthread_should_stop())
schedule_timeout_uninterruptible(1);
return 0;
}
/*
* Create an RCU-torture statistics message in the specified buffer.
*/
static int
rcu_torture_printk(char *page)
{
int cnt = 0;
int cpu;
int i;
long pipesummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
long batchsummary[RCU_TORTURE_PIPE_LEN + 1] = { 0 };
for_each_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
pipesummary[i] += per_cpu(rcu_torture_count, cpu)[i];
batchsummary[i] += per_cpu(rcu_torture_batch, cpu)[i];
}
}
for (i = RCU_TORTURE_PIPE_LEN - 1; i >= 0; i--) {
if (pipesummary[i] != 0)
break;
}
cnt += sprintf(&page[cnt], "rcutorture: ");
cnt += sprintf(&page[cnt],
"rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d",
rcu_torture_current,
rcu_torture_current_version,
list_empty(&rcu_torture_freelist),
atomic_read(&n_rcu_torture_alloc),
atomic_read(&n_rcu_torture_alloc_fail),
atomic_read(&n_rcu_torture_free));
cnt += sprintf(&page[cnt], "\nrcutorture: ");
if (i > 1)
cnt += sprintf(&page[cnt], "!!! ");
cnt += sprintf(&page[cnt], "Reader Pipe: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
cnt += sprintf(&page[cnt], " %ld", pipesummary[i]);
cnt += sprintf(&page[cnt], "\nrcutorture: ");
cnt += sprintf(&page[cnt], "Reader Batch: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN; i++)
cnt += sprintf(&page[cnt], " %ld", batchsummary[i]);
cnt += sprintf(&page[cnt], "\nrcutorture: ");
cnt += sprintf(&page[cnt], "Free-Block Circulation: ");
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
cnt += sprintf(&page[cnt], " %d",
atomic_read(&rcu_torture_wcount[i]));
}
cnt += sprintf(&page[cnt], "\n");
return cnt;
}
/*
* Print torture statistics. Caller must ensure that there is only
* one call to this function at a given time!!! This is normally
* accomplished by relying on the module system to only have one copy
* of the module loaded, and then by giving the rcu_torture_stats
* kthread full control (or the init/cleanup functions when rcu_torture_stats
* thread is not running).
*/
static void
rcu_torture_stats_print(void)
{
int cnt;
cnt = rcu_torture_printk(printk_buf);
printk(KERN_ALERT "%s", printk_buf);
}
/*
* Periodically prints torture statistics, if periodic statistics printing
* was specified via the stat_interval module parameter.
*
* No need to worry about fullstop here, since this one doesn't reference
* volatile state or register callbacks.
*/
static int
rcu_torture_stats(void *arg)
{
VERBOSE_PRINTK_STRING("rcu_torture_stats task started");
do {
schedule_timeout_interruptible(stat_interval * HZ);
rcu_torture_stats_print();
} while (!kthread_should_stop());
VERBOSE_PRINTK_STRING("rcu_torture_stats task stopping");
return 0;
}
static void
rcu_torture_cleanup(void)
{
int i;
fullstop = 1;
if (writer_task != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
kthread_stop(writer_task);
}
writer_task = NULL;
if (reader_tasks != NULL) {
for (i = 0; i < nrealreaders; i++) {
if (reader_tasks[i] != NULL) {
VERBOSE_PRINTK_STRING(
"Stopping rcu_torture_reader task");
kthread_stop(reader_tasks[i]);
}
reader_tasks[i] = NULL;
}
kfree(reader_tasks);
reader_tasks = NULL;
}
rcu_torture_current = NULL;
if (stats_task != NULL) {
VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
kthread_stop(stats_task);
}
stats_task = NULL;
/* Wait for all RCU callbacks to fire. */
for (i = 0; i < RCU_TORTURE_PIPE_LEN; i++)
synchronize_rcu();
rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
PRINTK_STRING("--- End of test");
}
static int
rcu_torture_init(void)
{
int i;
int cpu;
int firsterr = 0;
/* Process args and tell the world that the torturer is on the job. */
if (nreaders >= 0)
nrealreaders = nreaders;
else
nrealreaders = 2 * num_online_cpus();
printk(KERN_ALERT TORTURE_FLAG
"--- Start of test: nreaders=%d stat_interval=%d verbose=%d\n",
nrealreaders, stat_interval, verbose);
fullstop = 0;
/* Set up the freelist. */
INIT_LIST_HEAD(&rcu_torture_freelist);
for (i = 0; i < sizeof(rcu_tortures) / sizeof(rcu_tortures[0]); i++) {
list_add_tail(&rcu_tortures[i].rtort_free,
&rcu_torture_freelist);
}
/* Initialize the statistics so that each run gets its own numbers. */
rcu_torture_current = NULL;
rcu_torture_current_version = 0;
atomic_set(&n_rcu_torture_alloc, 0);
atomic_set(&n_rcu_torture_alloc_fail, 0);
atomic_set(&n_rcu_torture_free, 0);
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
atomic_set(&rcu_torture_wcount[i], 0);
for_each_cpu(cpu) {
for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
per_cpu(rcu_torture_count, cpu)[i] = 0;
per_cpu(rcu_torture_batch, cpu)[i] = 0;
}
}
/* Start up the kthreads. */
VERBOSE_PRINTK_STRING("Creating rcu_torture_writer task");
writer_task = kthread_run(rcu_torture_writer, NULL,
"rcu_torture_writer");
if (IS_ERR(writer_task)) {
firsterr = PTR_ERR(writer_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create writer");
writer_task = NULL;
goto unwind;
}
reader_tasks = kmalloc(nrealreaders * sizeof(reader_tasks[0]),
GFP_KERNEL);
if (reader_tasks == NULL) {
VERBOSE_PRINTK_ERRSTRING("out of memory");
firsterr = -ENOMEM;
goto unwind;
}
for (i = 0; i < nrealreaders; i++) {
VERBOSE_PRINTK_STRING("Creating rcu_torture_reader task");
reader_tasks[i] = kthread_run(rcu_torture_reader, NULL,
"rcu_torture_reader");
if (IS_ERR(reader_tasks[i])) {
firsterr = PTR_ERR(reader_tasks[i]);
VERBOSE_PRINTK_ERRSTRING("Failed to create reader");
reader_tasks[i] = NULL;
goto unwind;
}
}
if (stat_interval > 0) {
VERBOSE_PRINTK_STRING("Creating rcu_torture_stats task");
stats_task = kthread_run(rcu_torture_stats, NULL,
"rcu_torture_stats");
if (IS_ERR(stats_task)) {
firsterr = PTR_ERR(stats_task);
VERBOSE_PRINTK_ERRSTRING("Failed to create stats");
stats_task = NULL;
goto unwind;
}
}
return 0;
unwind:
rcu_torture_cleanup();
return firsterr;
}
module_init(rcu_torture_init);
module_exit(rcu_torture_cleanup);

View File

@@ -2511,8 +2511,6 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
cpustat->idle = cputime64_add(cpustat->idle, tmp);
/* Account for system time used */
acct_update_integrals(p);
/* Update rss highwater mark */
update_mem_hiwater(p);
}
/*
@@ -3879,7 +3877,6 @@ EXPORT_SYMBOL(cpu_present_map);
#ifndef CONFIG_SMP
cpumask_t cpu_online_map = CPU_MASK_ALL;
EXPORT_SYMBOL_GPL(cpu_online_map);
cpumask_t cpu_possible_map = CPU_MASK_ALL;
#endif

View File

@@ -277,7 +277,6 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
} else {
INIT_LIST_HEAD(&q->list);
q->flags = 0;
q->lock = NULL;
q->user = get_uid(t->user);
}
return(q);
@@ -406,6 +405,8 @@ void __exit_signal(struct task_struct *tsk)
void exit_signal(struct task_struct *tsk)
{
atomic_dec(&tsk->signal->live);
write_lock_irq(&tasklist_lock);
__exit_signal(tsk);
write_unlock_irq(&tasklist_lock);
@@ -650,8 +651,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
if (!valid_signal(sig))
return error;
error = -EPERM;
if ((!info || ((unsigned long)info != 1 &&
(unsigned long)info != 2 && SI_FROMUSER(info)))
if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info)))
&& ((sig != SIGCONT) ||
(current->signal->session != t->signal->session))
&& (current->euid ^ t->suid) && (current->euid ^ t->uid)
@@ -788,7 +788,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
*/
if ((unsigned long)info == 2)
if (info == SEND_SIG_FORCED)
goto out_set;
/* Real-time signals must be queued if sent by sigqueue, or
@@ -800,19 +800,19 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
pass on the info struct. */
q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
((unsigned long) info < 2 ||
(is_si_special(info) ||
info->si_code >= 0)));
if (q) {
list_add_tail(&q->list, &signals->list);
switch ((unsigned long) info) {
case 0:
case (unsigned long) SEND_SIG_NOINFO:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_USER;
q->info.si_pid = current->pid;
q->info.si_uid = current->uid;
break;
case 1:
case (unsigned long) SEND_SIG_PRIV:
q->info.si_signo = sig;
q->info.si_errno = 0;
q->info.si_code = SI_KERNEL;
@@ -823,20 +823,13 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
copy_siginfo(&q->info, info);
break;
}
} else {
if (sig >= SIGRTMIN && info && (unsigned long)info != 1
&& info->si_code != SI_USER)
} else if (!is_si_special(info)) {
if (sig >= SIGRTMIN && info->si_code != SI_USER)
/*
* Queue overflow, abort. We may abort if the signal was rt
* and sent by user using something other than kill().
*/
return -EAGAIN;
if (((unsigned long)info > 1) && (info->si_code == SI_TIMER))
/*
* Set up a return to indicate that we dropped
* the signal.
*/
ret = info->si_sys_private;
}
out_set:
@@ -857,12 +850,6 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
BUG();
assert_spin_locked(&t->sighand->siglock);
if (((unsigned long)info > 2) && (info->si_code == SI_TIMER))
/*
* Set up a return to indicate that we dropped the signal.
*/
ret = info->si_sys_private;
/* Short-circuit ignored signals. */
if (sig_ignored(t, sig))
goto out;
@@ -892,11 +879,13 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
int ret;
spin_lock_irqsave(&t->sighand->siglock, flags);
if (sigismember(&t->blocked, sig) || t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) {
if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) {
t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
sigdelset(&t->blocked, sig);
recalc_sigpending_tsk(t);
}
if (sigismember(&t->blocked, sig)) {
sigdelset(&t->blocked, sig);
}
recalc_sigpending_tsk(t);
ret = specific_send_sig_info(sig, info, t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
@@ -906,15 +895,7 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
void
force_sig_specific(int sig, struct task_struct *t)
{
unsigned long int flags;
spin_lock_irqsave(&t->sighand->siglock, flags);
if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN)
t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
sigdelset(&t->blocked, sig);
recalc_sigpending_tsk(t);
specific_send_sig_info(sig, (void *)2, t);
spin_unlock_irqrestore(&t->sighand->siglock, flags);
force_sig_info(sig, SEND_SIG_FORCED, t);
}
/*
@@ -1049,12 +1030,6 @@ __group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
assert_spin_locked(&p->sighand->siglock);
handle_stop_signal(sig, p);
if (((unsigned long)info > 2) && (info->si_code == SI_TIMER))
/*
* Set up a return to indicate that we dropped the signal.
*/
ret = info->si_sys_private;
/* Short-circuit ignored signals. */
if (sig_ignored(p, sig))
return ret;
@@ -1107,8 +1082,8 @@ void zap_other_threads(struct task_struct *p)
if (t != p->group_leader)
t->exit_signal = -1;
/* SIGKILL will be handled before any pending SIGSTOP */
sigaddset(&t->pending.signal, SIGKILL);
rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending);
signal_wake_up(t, 1);
}
}
@@ -1284,10 +1259,13 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
return ret;
}
#define __si_special(priv) \
((priv) ? SEND_SIG_PRIV : SEND_SIG_NOINFO)
int
send_sig(int sig, struct task_struct *p, int priv)
{
return send_sig_info(sig, (void*)(long)(priv != 0), p);
return send_sig_info(sig, __si_special(priv), p);
}
/*
@@ -1307,7 +1285,7 @@ send_group_sig_info(int sig, struct siginfo *info, struct task_struct *p)
void
force_sig(int sig, struct task_struct *p)
{
force_sig_info(sig, (void*)1L, p);
force_sig_info(sig, SEND_SIG_PRIV, p);
}
/*
@@ -1332,13 +1310,13 @@ force_sigsegv(int sig, struct task_struct *p)
int
kill_pg(pid_t pgrp, int sig, int priv)
{
return kill_pg_info(sig, (void *)(long)(priv != 0), pgrp);
return kill_pg_info(sig, __si_special(priv), pgrp);
}
int
kill_proc(pid_t pid, int sig, int priv)
{
return kill_proc_info(sig, (void *)(long)(priv != 0), pid);
return kill_proc_info(sig, __si_special(priv), pid);
}
/*
@@ -1369,11 +1347,12 @@ void sigqueue_free(struct sigqueue *q)
* pending queue.
*/
if (unlikely(!list_empty(&q->list))) {
read_lock(&tasklist_lock);
spin_lock_irqsave(q->lock, flags);
spinlock_t *lock = &current->sighand->siglock;
read_lock(&tasklist_lock);
spin_lock_irqsave(lock, flags);
if (!list_empty(&q->list))
list_del_init(&q->list);
spin_unlock_irqrestore(q->lock, flags);
spin_unlock_irqrestore(lock, flags);
read_unlock(&tasklist_lock);
}
q->flags &= ~SIGQUEUE_PREALLOC;
@@ -1412,7 +1391,6 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
goto out;
}
q->lock = &p->sighand->siglock;
list_add_tail(&q->list, &p->pending.list);
sigaddset(&p->pending.signal, sig);
if (!sigismember(&p->blocked, sig))
@@ -1460,7 +1438,6 @@ send_group_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
* We always use the shared queue for process-wide signals,
* to avoid several races.
*/
q->lock = &p->sighand->siglock;
list_add_tail(&q->list, &p->signal->shared_pending.list);
sigaddset(&p->signal->shared_pending.signal, sig);
@@ -1879,9 +1856,9 @@ relock:
/* Let the debugger run. */
ptrace_stop(signr, signr, info);
/* We're back. Did the debugger cancel the sig? */
/* We're back. Did the debugger cancel the sig or group_exit? */
signr = current->exit_code;
if (signr == 0)
if (signr == 0 || current->signal->flags & SIGNAL_GROUP_EXIT)
continue;
current->exit_code = 0;
@@ -2283,6 +2260,39 @@ sys_kill(int pid, int sig)
return kill_something_info(sig, &info, pid);
}
static int do_tkill(int tgid, int pid, int sig)
{
int error;
struct siginfo info;
struct task_struct *p;
error = -ESRCH;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = current->tgid;
info.si_uid = current->uid;
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
if (p && (tgid <= 0 || p->tgid == tgid)) {
error = check_kill_permission(sig, &info, p);
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
*/
if (!error && sig && p->sighand) {
spin_lock_irq(&p->sighand->siglock);
handle_stop_signal(sig, p);
error = specific_send_sig_info(sig, &info, p);
spin_unlock_irq(&p->sighand->siglock);
}
}
read_unlock(&tasklist_lock);
return error;
}
/**
* sys_tgkill - send signal to one specific thread
* @tgid: the thread group ID of the thread
@@ -2295,38 +2305,11 @@ sys_kill(int pid, int sig)
*/
asmlinkage long sys_tgkill(int tgid, int pid, int sig)
{
struct siginfo info;
int error;
struct task_struct *p;
/* This is only valid for single tasks */
if (pid <= 0 || tgid <= 0)
return -EINVAL;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = current->tgid;
info.si_uid = current->uid;
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
error = -ESRCH;
if (p && (p->tgid == tgid)) {
error = check_kill_permission(sig, &info, p);
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
*/
if (!error && sig && p->sighand) {
spin_lock_irq(&p->sighand->siglock);
handle_stop_signal(sig, p);
error = specific_send_sig_info(sig, &info, p);
spin_unlock_irq(&p->sighand->siglock);
}
}
read_unlock(&tasklist_lock);
return error;
return do_tkill(tgid, pid, sig);
}
/*
@@ -2335,38 +2318,11 @@ asmlinkage long sys_tgkill(int tgid, int pid, int sig)
asmlinkage long
sys_tkill(int pid, int sig)
{
struct siginfo info;
int error;
struct task_struct *p;
/* This is only valid for single tasks */
if (pid <= 0)
return -EINVAL;
info.si_signo = sig;
info.si_errno = 0;
info.si_code = SI_TKILL;
info.si_pid = current->tgid;
info.si_uid = current->uid;
read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
error = -ESRCH;
if (p) {
error = check_kill_permission(sig, &info, p);
/*
* The null signal is a permissions and process existence
* probe. No signal is actually delivered.
*/
if (!error && sig && p->sighand) {
spin_lock_irq(&p->sighand->siglock);
handle_stop_signal(sig, p);
error = specific_send_sig_info(sig, &info, p);
spin_unlock_irq(&p->sighand->siglock);
}
}
read_unlock(&tasklist_lock);
return error;
return do_tkill(0, pid, sig);
}
asmlinkage long

View File

@@ -338,30 +338,20 @@ int do_adjtimex(struct timex *txc)
if (mtemp >= MINSEC) {
ltemp = (time_offset / mtemp) << (SHIFT_USEC -
SHIFT_UPDATE);
if (ltemp < 0)
time_freq -= -ltemp >> SHIFT_KH;
else
time_freq += ltemp >> SHIFT_KH;
time_freq += shift_right(ltemp, SHIFT_KH);
} else /* calibration interval too short (p. 12) */
result = TIME_ERROR;
} else { /* PLL mode */
if (mtemp < MAXSEC) {
ltemp *= mtemp;
if (ltemp < 0)
time_freq -= -ltemp >> (time_constant +
time_constant +
SHIFT_KF - SHIFT_USEC);
else
time_freq += ltemp >> (time_constant +
time_freq += shift_right(ltemp,(time_constant +
time_constant +
SHIFT_KF - SHIFT_USEC);
SHIFT_KF - SHIFT_USEC));
} else /* calibration interval too long (p. 12) */
result = TIME_ERROR;
}
if (time_freq > time_tolerance)
time_freq = time_tolerance;
else if (time_freq < -time_tolerance)
time_freq = -time_tolerance;
time_freq = min(time_freq, time_tolerance);
time_freq = max(time_freq, -time_tolerance);
} /* STA_PLL || STA_PPSTIME */
} /* txc->modes & ADJ_OFFSET */
if (txc->modes & ADJ_TICK) {
@@ -384,10 +374,7 @@ leave: if ((time_status & (STA_UNSYNC|STA_CLOCKERR)) != 0
if ((txc->modes & ADJ_OFFSET_SINGLESHOT) == ADJ_OFFSET_SINGLESHOT)
txc->offset = save_adjust;
else {
if (time_offset < 0)
txc->offset = -(-time_offset >> SHIFT_UPDATE);
else
txc->offset = time_offset >> SHIFT_UPDATE;
txc->offset = shift_right(time_offset, SHIFT_UPDATE);
}
txc->freq = time_freq + pps_freq;
txc->maxerror = time_maxerror;
@@ -532,6 +519,7 @@ int do_settimeofday (struct timespec *tv)
clock_was_set();
return 0;
}
EXPORT_SYMBOL(do_settimeofday);
void do_gettimeofday (struct timeval *tv)
{

View File

@@ -46,6 +46,10 @@ static void time_interpolator_update(long delta_nsec);
#define time_interpolator_update(x)
#endif
u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
EXPORT_SYMBOL(jiffies_64);
/*
* per-CPU timer vector definitions:
*/
@@ -91,30 +95,6 @@ static inline void set_running_timer(tvec_base_t *base,
#endif
}
static void check_timer_failed(struct timer_list *timer)
{
static int whine_count;
if (whine_count < 16) {
whine_count++;
printk("Uninitialised timer!\n");
printk("This is just a warning. Your computer is OK\n");
printk("function=0x%p, data=0x%lx\n",
timer->function, timer->data);
dump_stack();
}
/*
* Now fix it up
*/
timer->magic = TIMER_MAGIC;
}
static inline void check_timer(struct timer_list *timer)
{
if (timer->magic != TIMER_MAGIC)
check_timer_failed(timer);
}
static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
{
unsigned long expires = timer->expires;
@@ -177,7 +157,6 @@ void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
timer->magic = TIMER_MAGIC;
}
EXPORT_SYMBOL(init_timer);
@@ -230,7 +209,6 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
int ret = 0;
BUG_ON(!timer->function);
check_timer(timer);
base = lock_timer_base(timer, &flags);
@@ -283,9 +261,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
unsigned long flags;
BUG_ON(timer_pending(timer) || !timer->function);
check_timer(timer);
spin_lock_irqsave(&base->t_base.lock, flags);
timer->base = &base->t_base;
internal_add_timer(base, timer);
@@ -316,8 +291,6 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
{
BUG_ON(!timer->function);
check_timer(timer);
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
@@ -348,8 +321,6 @@ int del_timer(struct timer_list *timer)
unsigned long flags;
int ret = 0;
check_timer(timer);
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
@@ -412,8 +383,6 @@ out:
*/
int del_timer_sync(struct timer_list *timer)
{
check_timer(timer);
for (;;) {
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
@@ -632,134 +601,118 @@ long time_next_adjust;
*/
static void second_overflow(void)
{
long ltemp;
long ltemp;
/* Bump the maxerror field */
time_maxerror += time_tolerance >> SHIFT_USEC;
if ( time_maxerror > NTP_PHASE_LIMIT ) {
time_maxerror = NTP_PHASE_LIMIT;
time_status |= STA_UNSYNC;
}
/*
* Leap second processing. If in leap-insert state at
* the end of the day, the system clock is set back one
* second; if in leap-delete state, the system clock is
* set ahead one second. The microtime() routine or
* external clock driver will insure that reported time
* is always monotonic. The ugly divides should be
* replaced.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
if (xtime.tv_sec % 86400 == 0) {
xtime.tv_sec--;
wall_to_monotonic.tv_sec++;
/* The timer interpolator will make time change gradually instead
* of an immediate jump by one second.
*/
time_interpolator_update(-NSEC_PER_SEC);
time_state = TIME_OOP;
clock_was_set();
printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
/* Bump the maxerror field */
time_maxerror += time_tolerance >> SHIFT_USEC;
if (time_maxerror > NTP_PHASE_LIMIT) {
time_maxerror = NTP_PHASE_LIMIT;
time_status |= STA_UNSYNC;
}
break;
case TIME_DEL:
if ((xtime.tv_sec + 1) % 86400 == 0) {
xtime.tv_sec++;
wall_to_monotonic.tv_sec--;
/* Use of time interpolator for a gradual change of time */
time_interpolator_update(NSEC_PER_SEC);
time_state = TIME_WAIT;
clock_was_set();
printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
/*
* Leap second processing. If in leap-insert state at the end of the
* day, the system clock is set back one second; if in leap-delete
* state, the system clock is set ahead one second. The microtime()
* routine or external clock driver will insure that reported time is
* always monotonic. The ugly divides should be replaced.
*/
switch (time_state) {
case TIME_OK:
if (time_status & STA_INS)
time_state = TIME_INS;
else if (time_status & STA_DEL)
time_state = TIME_DEL;
break;
case TIME_INS:
if (xtime.tv_sec % 86400 == 0) {
xtime.tv_sec--;
wall_to_monotonic.tv_sec++;
/*
* The timer interpolator will make time change
* gradually instead of an immediate jump by one second
*/
time_interpolator_update(-NSEC_PER_SEC);
time_state = TIME_OOP;
clock_was_set();
printk(KERN_NOTICE "Clock: inserting leap second "
"23:59:60 UTC\n");
}
break;
case TIME_DEL:
if ((xtime.tv_sec + 1) % 86400 == 0) {
xtime.tv_sec++;
wall_to_monotonic.tv_sec--;
/*
* Use of time interpolator for a gradual change of
* time
*/
time_interpolator_update(NSEC_PER_SEC);
time_state = TIME_WAIT;
clock_was_set();
printk(KERN_NOTICE "Clock: deleting leap second "
"23:59:59 UTC\n");
}
break;
case TIME_OOP:
time_state = TIME_WAIT;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
}
break;
case TIME_OOP:
time_state = TIME_WAIT;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
}
/*
* Compute the phase adjustment for the next second. In
* PLL mode, the offset is reduced by a fixed factor
* times the time constant. In FLL mode the offset is
* used directly. In either mode, the maximum phase
* adjustment for each second is clamped so as to spread
* the adjustment over not more than the number of
* seconds between updates.
*/
if (time_offset < 0) {
ltemp = -time_offset;
if (!(time_status & STA_FLL))
ltemp >>= SHIFT_KG + time_constant;
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
time_offset += ltemp;
time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
} else {
/*
* Compute the phase adjustment for the next second. In PLL mode, the
* offset is reduced by a fixed factor times the time constant. In FLL
* mode the offset is used directly. In either mode, the maximum phase
* adjustment for each second is clamped so as to spread the adjustment
* over not more than the number of seconds between updates.
*/
ltemp = time_offset;
if (!(time_status & STA_FLL))
ltemp >>= SHIFT_KG + time_constant;
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
ltemp = shift_right(ltemp, SHIFT_KG + time_constant);
ltemp = min(ltemp, (MAXPHASE / MINSEC) << SHIFT_UPDATE);
ltemp = max(ltemp, -(MAXPHASE / MINSEC) << SHIFT_UPDATE);
time_offset -= ltemp;
time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
}
/*
* Compute the frequency estimate and additional phase
* adjustment due to frequency error for the next
* second. When the PPS signal is engaged, gnaw on the
* watchdog counter and update the frequency computed by
* the pll and the PPS signal.
*/
pps_valid++;
if (pps_valid == PPS_VALID) { /* PPS signal lost */
pps_jitter = MAXTIME;
pps_stabil = MAXFREQ;
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
STA_PPSWANDER | STA_PPSERROR);
}
ltemp = time_freq + pps_freq;
if (ltemp < 0)
time_adj -= -ltemp >>
(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
else
time_adj += ltemp >>
(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
/*
* Compute the frequency estimate and additional phase adjustment due
* to frequency error for the next second. When the PPS signal is
* engaged, gnaw on the watchdog counter and update the frequency
* computed by the pll and the PPS signal.
*/
pps_valid++;
if (pps_valid == PPS_VALID) { /* PPS signal lost */
pps_jitter = MAXTIME;
pps_stabil = MAXFREQ;
time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
STA_PPSWANDER | STA_PPSERROR);
}
ltemp = time_freq + pps_freq;
time_adj += shift_right(ltemp,(SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE));
#if HZ == 100
/* Compensate for (HZ==100) != (1 << SHIFT_HZ).
* Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
*/
if (time_adj < 0)
time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
else
time_adj += (time_adj >> 2) + (time_adj >> 5);
/*
* Compensate for (HZ==100) != (1 << SHIFT_HZ). Add 25% and 3.125% to
* get 128.125; => only 0.125% error (p. 14)
*/
time_adj += shift_right(time_adj, 2) + shift_right(time_adj, 5);
#endif
#if HZ == 250
/*
* Compensate for (HZ==250) != (1 << SHIFT_HZ). Add 1.5625% and
* 0.78125% to get 255.85938; => only 0.05% error (p. 14)
*/
time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
#endif
#if HZ == 1000
/* Compensate for (HZ==1000) != (1 << SHIFT_HZ).
* Add 1.5625% and 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
*/
if (time_adj < 0)
time_adj -= (-time_adj >> 6) + (-time_adj >> 7);
else
time_adj += (time_adj >> 6) + (time_adj >> 7);
/*
* Compensate for (HZ==1000) != (1 << SHIFT_HZ). Add 1.5625% and
* 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
*/
time_adj += shift_right(time_adj, 6) + shift_right(time_adj, 7);
#endif
}
@@ -768,23 +721,20 @@ static void update_wall_time_one_tick(void)
{
long time_adjust_step, delta_nsec;
if ( (time_adjust_step = time_adjust) != 0 ) {
/* We are doing an adjtime thing.
*
* Prepare time_adjust_step to be within bounds.
* Note that a positive time_adjust means we want the clock
* to run faster.
*
* Limit the amount of the step to be in the range
* -tickadj .. +tickadj
*/
if (time_adjust > tickadj)
time_adjust_step = tickadj;
else if (time_adjust < -tickadj)
time_adjust_step = -tickadj;
if ((time_adjust_step = time_adjust) != 0 ) {
/*
* We are doing an adjtime thing. Prepare time_adjust_step to
* be within bounds. Note that a positive time_adjust means we
* want the clock to run faster.
*
* Limit the amount of the step to be in the range
* -tickadj .. +tickadj
*/
time_adjust_step = min(time_adjust_step, (long)tickadj);
time_adjust_step = max(time_adjust_step, (long)-tickadj);
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
/* Reduce by this step the amount of time left */
time_adjust -= time_adjust_step;
}
delta_nsec = tick_nsec + time_adjust_step * 1000;
/*
@@ -792,13 +742,8 @@ static void update_wall_time_one_tick(void)
* advance the tick more.
*/
time_phase += time_adj;
if (time_phase <= -FINENSEC) {
long ltemp = -time_phase >> (SHIFT_SCALE - 10);
time_phase += ltemp << (SHIFT_SCALE - 10);
delta_nsec -= ltemp;
}
else if (time_phase >= FINENSEC) {
long ltemp = time_phase >> (SHIFT_SCALE - 10);
if ((time_phase >= FINENSEC) || (time_phase <= -FINENSEC)) {
long ltemp = shift_right(time_phase, (SHIFT_SCALE - 10));
time_phase -= ltemp << (SHIFT_SCALE - 10);
delta_nsec += ltemp;
}
@@ -1128,8 +1073,8 @@ fastcall signed long __sched schedule_timeout(signed long timeout)
if (timeout < 0)
{
printk(KERN_ERR "schedule_timeout: wrong timeout "
"value %lx from %p\n", timeout,
__builtin_return_address(0));
"value %lx from %p\n", timeout,
__builtin_return_address(0));
current->state = TASK_RUNNING;
goto out;
}
@@ -1137,12 +1082,8 @@ fastcall signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
init_timer(&timer);
timer.expires = expire;
timer.data = (unsigned long) current;
timer.function = process_timeout;
add_timer(&timer);
setup_timer(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire);
schedule();
del_singleshot_timer_sync(&timer);
@@ -1159,15 +1100,15 @@ EXPORT_SYMBOL(schedule_timeout);
*/
signed long __sched schedule_timeout_interruptible(signed long timeout)
{
__set_current_state(TASK_INTERRUPTIBLE);
return schedule_timeout(timeout);
__set_current_state(TASK_INTERRUPTIBLE);
return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_interruptible);
signed long __sched schedule_timeout_uninterruptible(signed long timeout)
{
__set_current_state(TASK_UNINTERRUPTIBLE);
return schedule_timeout(timeout);
__set_current_state(TASK_UNINTERRUPTIBLE);
return schedule_timeout(timeout);
}
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
@@ -1507,16 +1448,18 @@ static void time_interpolator_update(long delta_nsec)
if (!time_interpolator)
return;
/* The interpolator compensates for late ticks by accumulating
* the late time in time_interpolator->offset. A tick earlier than
* expected will lead to a reset of the offset and a corresponding
* jump of the clock forward. Again this only works if the
* interpolator clock is running slightly slower than the regular clock
* and the tuning logic insures that.
*/
/*
* The interpolator compensates for late ticks by accumulating the late
* time in time_interpolator->offset. A tick earlier than expected will
* lead to a reset of the offset and a corresponding jump of the clock
* forward. Again this only works if the interpolator clock is running
* slightly slower than the regular clock and the tuning logic insures
* that.
*/
counter = time_interpolator_get_counter(1);
offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator);
offset = time_interpolator->offset +
GET_TI_NSECS(counter, time_interpolator);
if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
time_interpolator->offset = offset - delta_nsec;

View File

@@ -12,6 +12,8 @@
* Andrew Morton <andrewm@uow.edu.au>
* Kai Petzke <wpp@marie.physik.tu-berlin.de>
* Theodore Ts'o <tytso@mit.edu>
*
* Made to use alloc_percpu by Christoph Lameter <clameter@sgi.com>.
*/
#include <linux/module.h>
@@ -57,7 +59,7 @@ struct cpu_workqueue_struct {
* per-CPU workqueues:
*/
struct workqueue_struct {
struct cpu_workqueue_struct cpu_wq[NR_CPUS];
struct cpu_workqueue_struct *cpu_wq;
const char *name;
struct list_head list; /* Empty if single thread */
};
@@ -102,7 +104,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
if (unlikely(is_single_threaded(wq)))
cpu = 0;
BUG_ON(!list_empty(&work->entry));
__queue_work(wq->cpu_wq + cpu, work);
__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
ret = 1;
}
put_cpu();
@@ -118,7 +120,7 @@ static void delayed_work_timer_fn(unsigned long __data)
if (unlikely(is_single_threaded(wq)))
cpu = 0;
__queue_work(wq->cpu_wq + cpu, work);
__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
}
int fastcall queue_delayed_work(struct workqueue_struct *wq,
@@ -265,13 +267,13 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
if (is_single_threaded(wq)) {
/* Always use cpu 0's area. */
flush_cpu_workqueue(wq->cpu_wq + 0);
flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, 0));
} else {
int cpu;
lock_cpu_hotplug();
for_each_online_cpu(cpu)
flush_cpu_workqueue(wq->cpu_wq + cpu);
flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
unlock_cpu_hotplug();
}
}
@@ -279,7 +281,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
int cpu)
{
struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
struct task_struct *p;
spin_lock_init(&cwq->lock);
@@ -312,6 +314,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
if (!wq)
return NULL;
wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
wq->name = name;
/* We don't need the distraction of CPUs appearing and vanishing. */
lock_cpu_hotplug();
@@ -353,7 +356,7 @@ static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
unsigned long flags;
struct task_struct *p;
cwq = wq->cpu_wq + cpu;
cwq = per_cpu_ptr(wq->cpu_wq, cpu);
spin_lock_irqsave(&cwq->lock, flags);
p = cwq->thread;
cwq->thread = NULL;
@@ -380,6 +383,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
spin_unlock(&workqueue_lock);
}
unlock_cpu_hotplug();
free_percpu(wq->cpu_wq);
kfree(wq);
}
@@ -458,7 +462,7 @@ int current_is_keventd(void)
BUG_ON(!keventd_wq);
cwq = keventd_wq->cpu_wq + cpu;
cwq = per_cpu_ptr(keventd_wq->cpu_wq, cpu);
if (current == cwq->thread)
ret = 1;
@@ -470,7 +474,7 @@ int current_is_keventd(void)
/* Take the work from this (downed) CPU. */
static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
{
struct cpu_workqueue_struct *cwq = wq->cpu_wq + cpu;
struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
LIST_HEAD(list);
struct work_struct *work;
@@ -481,7 +485,7 @@ static void take_over_work(struct workqueue_struct *wq, unsigned int cpu)
printk("Taking work for %s\n", wq->name);
work = list_entry(list.next,struct work_struct,entry);
list_del(&work->entry);
__queue_work(wq->cpu_wq + smp_processor_id(), work);
__queue_work(per_cpu_ptr(wq->cpu_wq, smp_processor_id()), work);
}
spin_unlock_irq(&cwq->lock);
}
@@ -508,15 +512,18 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
case CPU_ONLINE:
/* Kick off worker threads. */
list_for_each_entry(wq, &workqueues, list) {
kthread_bind(wq->cpu_wq[hotcpu].thread, hotcpu);
wake_up_process(wq->cpu_wq[hotcpu].thread);
struct cpu_workqueue_struct *cwq;
cwq = per_cpu_ptr(wq->cpu_wq, hotcpu);
kthread_bind(cwq->thread, hotcpu);
wake_up_process(cwq->thread);
}
break;
case CPU_UP_CANCELED:
list_for_each_entry(wq, &workqueues, list) {
/* Unbind so it can run. */
kthread_bind(wq->cpu_wq[hotcpu].thread,
kthread_bind(per_cpu_ptr(wq->cpu_wq, hotcpu)->thread,
smp_processor_id());
cleanup_workqueue_thread(wq, hotcpu);
}