Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git

This commit is contained in:
David Woodhouse
2005-07-02 13:39:09 +01:00
3119 changed files with 163508 additions and 62433 deletions

46
kernel/Kconfig.hz Normal file
View File

@@ -0,0 +1,46 @@
#
# Timer Interrupt Frequency Configuration
#
choice
prompt "Timer frequency"
default HZ_250
help
Allows the configuration of the timer frequency. It is customary
to have the timer interrupt run at 1000 HZ but 100 HZ may be more
beneficial for servers and NUMA systems that do not need to have
a fast response for user interaction and that may experience bus
contention and cacheline bounces as a result of timer interrupts.
Note that the timer interrupt occurs on each processor in an SMP
environment leading to NR_CPUS * HZ number of timer interrupts
per second.
config HZ_100
bool "100 HZ"
help
100 HZ is a typical choice for servers, SMP and NUMA systems
with lots of processors that may show reduced performance if
too many timer interrupts are occurring.
config HZ_250
bool "250 HZ"
help
250 HZ is a good compromise choice allowing server performance
while also showing good interactive responsiveness even
on SMP and NUMA systems.
config HZ_1000
bool "1000 HZ"
help
1000 HZ is the preferred choice for desktop systems and other
systems requiring fast interactive responses to events.
endchoice
config HZ
int
default 100 if HZ_100
default 250 if HZ_250
default 1000 if HZ_1000

65
kernel/Kconfig.preempt Normal file
View File

@@ -0,0 +1,65 @@
choice
prompt "Preemption Model"
default PREEMPT_NONE
config PREEMPT_NONE
bool "No Forced Preemption (Server)"
help
This is the traditional Linux preemption model, geared towards
throughput. It will still provide good latencies most of the
time, but there are no guarantees and occasional longer delays
are possible.
Select this option if you are building a kernel for a server or
scientific/computation system, or if you want to maximize the
raw processing power of the kernel, irrespective of scheduling
latencies.
config PREEMPT_VOLUNTARY
bool "Voluntary Kernel Preemption (Desktop)"
help
This option reduces the latency of the kernel by adding more
"explicit preemption points" to the kernel code. These new
preemption points have been selected to reduce the maximum
latency of rescheduling, providing faster application reactions,
at the cost of slighly lower throughput.
This allows reaction to interactive events by allowing a
low priority process to voluntarily preempt itself even if it
is in kernel mode executing a system call. This allows
applications to run more 'smoothly' even when the system is
under load.
Select this if you are building a kernel for a desktop system.
config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
help
This option reduces the latency of the kernel by making
all kernel code (that is not executing in a critical section)
preemptible. This allows reaction to interactive events by
permitting a low priority process to be preempted involuntarily
even if it is in kernel mode executing a system call and would
otherwise not be about to reach a natural preemption point.
This allows applications to run more 'smoothly' even when the
system is under load, at the cost of slighly lower throughput
and a slight runtime overhead to kernel code.
Select this if you are building a kernel for a desktop or
embedded system with latency requirements in the milliseconds
range.
endchoice
config PREEMPT_BKL
bool "Preempt The Big Kernel Lock"
depends on SMP || PREEMPT
default y
help
This option reduces the latency of the kernel by making the
big kernel lock preemptible.
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.

View File

@@ -17,6 +17,7 @@ obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_IKCONFIG) += configs.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_SYSFS) += ksysfs.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_SECCOMP) += seccomp.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)

View File

@@ -63,19 +63,15 @@ static int take_cpu_down(void *unused)
{
int err;
/* Take offline: makes arch_cpu_down somewhat easier. */
cpu_clear(smp_processor_id(), cpu_online_map);
/* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable();
if (err < 0)
cpu_set(smp_processor_id(), cpu_online_map);
else
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
return err;
return err;
/* Force idle task to run as soon as we yield: it should
immediately notice cpu is offline and die quickly. */
sched_idle_next();
return 0;
}
int cpu_down(unsigned int cpu)

View File

@@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = {
static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
{
struct qstr qstr;
struct dentry *d;
qstr.name = name;
qstr.len = strlen(name);
qstr.hash = full_name_hash(name, qstr.len);
d = lookup_hash(&qstr, parent);
struct dentry *d = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(d))
d->d_op = &cpuset_dops;
return d;
@@ -601,10 +595,62 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
return 0;
}
/*
* For a given cpuset cur, partition the system as follows
* a. All cpus in the parent cpuset's cpus_allowed that are not part of any
* exclusive child cpusets
* b. All cpus in the current cpuset's cpus_allowed that are not part of any
* exclusive child cpusets
* Build these two partitions by calling partition_sched_domains
*
* Call with cpuset_sem held. May nest a call to the
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
*/
static void update_cpu_domains(struct cpuset *cur)
{
struct cpuset *c, *par = cur->parent;
cpumask_t pspan, cspan;
if (par == NULL || cpus_empty(cur->cpus_allowed))
return;
/*
* Get all cpus from parent's cpus_allowed not part of exclusive
* children
*/
pspan = par->cpus_allowed;
list_for_each_entry(c, &par->children, sibling) {
if (is_cpu_exclusive(c))
cpus_andnot(pspan, pspan, c->cpus_allowed);
}
if (is_removed(cur) || !is_cpu_exclusive(cur)) {
cpus_or(pspan, pspan, cur->cpus_allowed);
if (cpus_equal(pspan, cur->cpus_allowed))
return;
cspan = CPU_MASK_NONE;
} else {
if (cpus_empty(pspan))
return;
cspan = cur->cpus_allowed;
/*
* Get all cpus from current cpuset's cpus_allowed not part
* of exclusive children
*/
list_for_each_entry(c, &cur->children, sibling) {
if (is_cpu_exclusive(c))
cpus_andnot(cspan, cspan, c->cpus_allowed);
}
}
lock_cpu_hotplug();
partition_sched_domains(&pspan, &cspan);
unlock_cpu_hotplug();
}
static int update_cpumask(struct cpuset *cs, char *buf)
{
struct cpuset trialcs;
int retval;
int retval, cpus_unchanged;
trialcs = *cs;
retval = cpulist_parse(buf, trialcs.cpus_allowed);
@@ -614,9 +660,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
if (cpus_empty(trialcs.cpus_allowed))
return -ENOSPC;
retval = validate_change(cs, &trialcs);
if (retval == 0)
cs->cpus_allowed = trialcs.cpus_allowed;
return retval;
if (retval < 0)
return retval;
cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
cs->cpus_allowed = trialcs.cpus_allowed;
if (is_cpu_exclusive(cs) && !cpus_unchanged)
update_cpu_domains(cs);
return 0;
}
static int update_nodemask(struct cpuset *cs, char *buf)
@@ -652,7 +702,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
{
int turning_on;
struct cpuset trialcs;
int err;
int err, cpu_exclusive_changed;
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
@@ -663,13 +713,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
clear_bit(bit, &trialcs.flags);
err = validate_change(cs, &trialcs);
if (err == 0) {
if (turning_on)
set_bit(bit, &cs->flags);
else
clear_bit(bit, &cs->flags);
}
return err;
if (err < 0)
return err;
cpu_exclusive_changed =
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
if (turning_on)
set_bit(bit, &cs->flags);
else
clear_bit(bit, &cs->flags);
if (cpu_exclusive_changed)
update_cpu_domains(cs);
return 0;
}
static int attach_task(struct cpuset *cs, char *buf)
@@ -1315,12 +1370,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
up(&cpuset_sem);
return -EBUSY;
}
spin_lock(&cs->dentry->d_lock);
parent = cs->parent;
set_bit(CS_REMOVED, &cs->flags);
if (is_cpu_exclusive(cs))
update_cpu_domains(cs);
list_del(&cs->sibling); /* delete my sibling from parent->children */
if (list_empty(&parent->children))
check_for_release(parent);
spin_lock(&cs->dentry->d_lock);
d = dget(cs->dentry);
cs->dentry = NULL;
spin_unlock(&d->d_lock);

52
kernel/crash_dump.c Normal file
View File

@@ -0,0 +1,52 @@
/*
* kernel/crash_dump.c - Memory preserving reboot related code.
*
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
* Copyright (C) IBM Corporation, 2004. All rights reserved
*/
#include <linux/smp_lock.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/crash_dump.h>
#include <asm/io.h>
#include <asm/uaccess.h>
/* Stores the physical address of elf header of crash image. */
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
/*
* Copy a page from "oldmem". For this page, there is no pte mapped
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
*/
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
size_t csize, unsigned long offset, int userbuf)
{
void *page, *vaddr;
if (!csize)
return 0;
page = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!page)
return -ENOMEM;
vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
copy_page(page, vaddr);
kunmap_atomic(vaddr, KM_PTE0);
if (userbuf) {
if (copy_to_user(buf, (page + offset), csize)) {
kfree(page);
return -EFAULT;
}
} else {
memcpy(buf, (page + offset), csize);
}
kfree(page);
return csize;
}

View File

@@ -72,6 +72,11 @@ repeat:
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
__exit_sighand(p);
/*
* Note that the fastpath in sys_times depends on __exit_signal having
* updated the counters before a task is removed from the tasklist of
* the process by __unhash_process.
*/
__unhash_process(p);
/*
@@ -779,6 +784,8 @@ fastcall NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
@@ -793,6 +800,17 @@ fastcall NORET_TYPE void do_exit(long code)
ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
}
/*
* We're taking recursive faults here in do_exit. Safest is to just
* leave this task alone and wait for reboot.
*/
if (unlikely(tsk->flags & PF_EXITING)) {
printk(KERN_ALERT
"Fixing recursive fault but reboot is needed!\n");
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
}
tsk->flags |= PF_EXITING;
/*

View File

@@ -194,6 +194,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
mm->mmap = NULL;
mm->mmap_cache = NULL;
mm->free_area_cache = oldmm->mmap_base;
mm->cached_hole_size = ~0UL;
mm->map_count = 0;
set_mm_counter(mm, rss, 0);
set_mm_counter(mm, anon_rss, 0);
@@ -249,8 +250,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
/*
* Link in the new vma and copy the page table entries:
* link in first so that swapoff can see swap entries,
* and try_to_unmap_one's find_vma find the new vma.
* link in first so that swapoff can see swap entries.
* Note that, exceptionally, here the vma is inserted
* without holding mm->mmap_sem.
*/
spin_lock(&mm->page_table_lock);
*pprev = tmp;
@@ -322,6 +324,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
mm->ioctx_list = NULL;
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
@@ -1000,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags,
p->pdeath_signal = 0;
p->exit_state = 0;
/* Perform scheduler related setup */
sched_fork(p);
/*
* Ok, make it visible to the rest of the system.
* We dont wake it up yet.
@@ -1011,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags,
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
/*
* The task hasn't been attached yet, so cpus_allowed mask cannot
* have changed. The cpus_allowed mask of the parent may have
* changed after it was copied first time, and it may then move to
* another CPU - so we re-copy it here and set the child's CPU to
* the parent's CPU. This avoids alot of nasty races.
* The task hasn't been attached yet, so its cpus_allowed mask will
* not be changed, nor will its assigned CPU.
*
* The cpus_allowed mask of the parent may have changed after it was
* copied first time - so re-copy it here, then check the child's CPU
* to ensure it is on a valid CPU (and if not, just force it back to
* parent's CPU). This avoids alot of nasty races.
*/
p->cpus_allowed = current->cpus_allowed;
set_task_cpu(p, smp_processor_id());
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed)))
set_task_cpu(p, smp_processor_id());
/*
* Check for pending SIGKILL! The new thread should not be allowed
@@ -1084,6 +1090,11 @@ static task_t *copy_process(unsigned long clone_flags,
spin_unlock(&current->sighand->siglock);
}
/*
* inherit ioprio
*/
p->ioprio = current->ioprio;
SET_LINKS(p);
if (unlikely(p->ptrace & PT_PTRACED))
__ptrace_link(p, current->parent);

View File

@@ -9,6 +9,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
/*
* Autodetection depends on the fact that any interrupt that
@@ -26,7 +27,7 @@ static DECLARE_MUTEX(probe_sem);
*/
unsigned long probe_irq_on(void)
{
unsigned long val, delay;
unsigned long val;
irq_desc_t *desc;
unsigned int i;
@@ -45,8 +46,7 @@ unsigned long probe_irq_on(void)
}
/* Wait for longstanding interrupts to trigger. */
for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
/* about 20ms delay */ barrier();
msleep(20);
/*
* enable any unassigned irqs
@@ -68,8 +68,7 @@ unsigned long probe_irq_on(void)
/*
* Wait for spurious interrupts to trigger
*/
for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
/* about 100ms delay */ barrier();
msleep(100);
/*
* Now filter out any obviously spurious interrupts

View File

@@ -172,7 +172,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
spin_lock(&desc->lock);
if (!noirqdebug)
note_interrupt(irq, desc, action_ret);
note_interrupt(irq, desc, action_ret, regs);
if (likely(!(desc->status & IRQ_PENDING)))
break;
desc->status &= ~IRQ_PENDING;

View File

@@ -6,6 +6,7 @@
* This file contains driver APIs to the irq subsystem.
*/
#include <linux/config.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/random.h>
@@ -255,6 +256,13 @@ void free_irq(unsigned int irq, void *dev_id)
/* Found it - now remove it from the list of entries */
*pp = action->next;
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
if (desc->handler->release)
desc->handler->release(irq, dev_id);
#endif
if (!desc->action) {
desc->status |= IRQ_DISABLED;
if (desc->handler->shutdown)

View File

@@ -11,6 +11,83 @@
#include <linux/kallsyms.h>
#include <linux/interrupt.h>
static int irqfixup;
/*
* Recovery handler for misrouted interrupts.
*/
static int misrouted_irq(int irq, struct pt_regs *regs)
{
int i;
irq_desc_t *desc;
int ok = 0;
int work = 0; /* Did we do work for a real IRQ */
for(i = 1; i < NR_IRQS; i++) {
struct irqaction *action;
if (i == irq) /* Already tried */
continue;
desc = &irq_desc[i];
spin_lock(&desc->lock);
action = desc->action;
/* Already running on another processor */
if (desc->status & IRQ_INPROGRESS) {
/*
* Already running: If it is shared get the other
* CPU to go looking for our mystery interrupt too
*/
if (desc->action && (desc->action->flags & SA_SHIRQ))
desc->status |= IRQ_PENDING;
spin_unlock(&desc->lock);
continue;
}
/* Honour the normal IRQ locking */
desc->status |= IRQ_INPROGRESS;
spin_unlock(&desc->lock);
while (action) {
/* Only shared IRQ handlers are safe to call */
if (action->flags & SA_SHIRQ) {
if (action->handler(i, action->dev_id, regs) ==
IRQ_HANDLED)
ok = 1;
}
action = action->next;
}
local_irq_disable();
/* Now clean up the flags */
spin_lock(&desc->lock);
action = desc->action;
/*
* While we were looking for a fixup someone queued a real
* IRQ clashing with our walk
*/
while ((desc->status & IRQ_PENDING) && action) {
/*
* Perform real IRQ processing for the IRQ we deferred
*/
work = 1;
spin_unlock(&desc->lock);
handle_IRQ_event(i, regs, action);
spin_lock(&desc->lock);
desc->status &= ~IRQ_PENDING;
}
desc->status &= ~IRQ_INPROGRESS;
/*
* If we did actual work for the real IRQ line we must let the
* IRQ controller clean up too
*/
if(work)
desc->handler->end(i);
spin_unlock(&desc->lock);
}
/* So the caller can adjust the irq error counts */
return ok;
}
/*
* If 99,900 of the previous 100,000 interrupts have not been handled
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
@@ -31,7 +108,8 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
printk(KERN_ERR "irq event %d: bogus return value %x\n",
irq, action_ret);
} else {
printk(KERN_ERR "irq %d: nobody cared!\n", irq);
printk(KERN_ERR "irq %d: nobody cared (try booting with "
"the \"irqpoll\" option)\n", irq);
}
dump_stack();
printk(KERN_ERR "handlers:\n");
@@ -45,7 +123,7 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
}
}
void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
{
static int count = 100;
@@ -55,7 +133,8 @@ void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
}
}
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
struct pt_regs *regs)
{
if (action_ret != IRQ_HANDLED) {
desc->irqs_unhandled++;
@@ -63,6 +142,15 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
report_bad_irq(irq, desc, action_ret);
}
if (unlikely(irqfixup)) {
/* Don't punish working computers */
if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
int ok = misrouted_irq(irq, regs);
if (action_ret == IRQ_NONE)
desc->irqs_unhandled -= ok;
}
}
desc->irq_count++;
if (desc->irq_count < 100000)
return;
@@ -94,3 +182,24 @@ int __init noirqdebug_setup(char *str)
__setup("noirqdebug", noirqdebug_setup);
static int __init irqfixup_setup(char *str)
{
irqfixup = 1;
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
printk(KERN_WARNING "This may impact system performance.\n");
return 1;
}
__setup("irqfixup", irqfixup_setup);
static int __init irqpoll_setup(char *str)
{
irqfixup = 2;
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
"enabled\n");
printk(KERN_WARNING "This may significantly impact system "
"performance\n");
return 1;
}
__setup("irqpoll", irqpoll_setup);

View File

@@ -153,11 +153,15 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
switch (which) {
case ITIMER_REAL:
again:
spin_lock_irq(&tsk->sighand->siglock);
interval = tsk->signal->it_real_incr;
val = it_real_value(tsk->signal);
if (val)
del_timer_sync(&tsk->signal->real_timer);
/* We are sharing ->siglock with it_real_fn() */
if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
goto again;
}
tsk->signal->it_real_incr =
timeval_to_jiffies(&value->it_interval);
it_real_arm(tsk, timeval_to_jiffies(&value->it_value));

1063
kernel/kexec.c Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -120,6 +120,7 @@ struct subprocess_info {
char *path;
char **argv;
char **envp;
struct key *ring;
int wait;
int retval;
};
@@ -130,16 +131,21 @@ struct subprocess_info {
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
struct key *old_session;
int retval;
/* Unblock all signals. */
/* Unblock all signals and set the session keyring. */
key_get(sub_info->ring);
flush_signals(current);
spin_lock_irq(&current->sighand->siglock);
old_session = __install_session_keyring(current, sub_info->ring);
flush_signal_handlers(current, 1);
sigemptyset(&current->blocked);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
key_put(old_session);
/* We can run anywhere, unlike our parent keventd(). */
set_cpus_allowed(current, CPU_MASK_ALL);
@@ -211,10 +217,11 @@ static void __call_usermodehelper(void *data)
}
/**
* call_usermodehelper - start a usermode application
* call_usermodehelper_keys - start a usermode application
* @path: pathname for the application
* @argv: null-terminated argument list
* @envp: null-terminated environment list
* @session_keyring: session keyring for process (NULL for an empty keyring)
* @wait: wait for the application to finish and return status.
*
* Runs a user-space application. The application is started
@@ -224,7 +231,8 @@ static void __call_usermodehelper(void *data)
* Must be called from process context. Returns a negative error code
* if program was not execed successfully, or 0.
*/
int call_usermodehelper(char *path, char **argv, char **envp, int wait)
int call_usermodehelper_keys(char *path, char **argv, char **envp,
struct key *session_keyring, int wait)
{
DECLARE_COMPLETION(done);
struct subprocess_info sub_info = {
@@ -232,6 +240,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
.path = path,
.argv = argv,
.envp = envp,
.ring = session_keyring,
.wait = wait,
.retval = 0,
};
@@ -247,7 +256,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
wait_for_completion(&done);
return sub_info.retval;
}
EXPORT_SYMBOL(call_usermodehelper);
EXPORT_SYMBOL(call_usermodehelper_keys);
void __init usermodehelper_init(void)
{

View File

@@ -27,12 +27,16 @@
* interface to access function arguments.
* 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
* exceptions notifier to be first on the priority list.
* 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
* <prasanna@in.ibm.com> added function-return probes.
*/
#include <linux/kprobes.h>
#include <linux/spinlock.h>
#include <linux/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
#include <asm/errno.h>
#include <asm/kdebug.h>
@@ -41,11 +45,112 @@
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
unsigned int kprobe_cpu = NR_CPUS;
static DEFINE_SPINLOCK(kprobe_lock);
static struct kprobe *curr_kprobe;
/*
* kprobe->ainsn.insn points to the copy of the instruction to be
* single-stepped. x86_64, POWER4 and above have no-exec support and
* stepping on the instruction on a vmalloced/kmalloced/data page
* is a recipe for disaster
*/
#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
struct kprobe_insn_page {
struct hlist_node hlist;
kprobe_opcode_t *insns; /* Page of instruction slots */
char slot_used[INSNS_PER_PAGE];
int nused;
};
static struct hlist_head kprobe_insn_pages;
/**
* get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
*/
kprobe_opcode_t *get_insn_slot(void)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
hlist_for_each(pos, &kprobe_insn_pages) {
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
if (kip->nused < INSNS_PER_PAGE) {
int i;
for (i = 0; i < INSNS_PER_PAGE; i++) {
if (!kip->slot_used[i]) {
kip->slot_used[i] = 1;
kip->nused++;
return kip->insns + (i * MAX_INSN_SIZE);
}
}
/* Surprise! No unused slots. Fix kip->nused. */
kip->nused = INSNS_PER_PAGE;
}
}
/* All out of space. Need to allocate a new page. Use slot 0.*/
kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
if (!kip) {
return NULL;
}
/*
* Use module_alloc so this page is within +/- 2GB of where the
* kernel image and loaded module images reside. This is required
* so x86_64 can correctly handle the %rip-relative fixups.
*/
kip->insns = module_alloc(PAGE_SIZE);
if (!kip->insns) {
kfree(kip);
return NULL;
}
INIT_HLIST_NODE(&kip->hlist);
hlist_add_head(&kip->hlist, &kprobe_insn_pages);
memset(kip->slot_used, 0, INSNS_PER_PAGE);
kip->slot_used[0] = 1;
kip->nused = 1;
return kip->insns;
}
void free_insn_slot(kprobe_opcode_t *slot)
{
struct kprobe_insn_page *kip;
struct hlist_node *pos;
hlist_for_each(pos, &kprobe_insn_pages) {
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
if (kip->insns <= slot &&
slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
int i = (slot - kip->insns) / MAX_INSN_SIZE;
kip->slot_used[i] = 0;
kip->nused--;
if (kip->nused == 0) {
/*
* Page is no longer in use. Free it unless
* it's the last one. We keep the last one
* so as not to have to set it up again the
* next time somebody inserts a probe.
*/
hlist_del(&kip->hlist);
if (hlist_empty(&kprobe_insn_pages)) {
INIT_HLIST_NODE(&kip->hlist);
hlist_add_head(&kip->hlist,
&kprobe_insn_pages);
} else {
module_free(NULL, kip->insns);
kfree(kip);
}
}
return;
}
}
}
/* Locks kprobe: irqs must be disabled */
void lock_kprobes(void)
{
@@ -78,22 +183,23 @@ struct kprobe *get_kprobe(void *addr)
* Aggregate handlers for multiple kprobes support - these handlers
* take care of invoking the individual kprobe handlers on p->list
*/
int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe *kp;
list_for_each_entry(kp, &p->list, list) {
if (kp->pre_handler) {
curr_kprobe = kp;
kp->pre_handler(kp, regs);
curr_kprobe = NULL;
if (kp->pre_handler(kp, regs))
return 1;
}
curr_kprobe = NULL;
}
return 0;
}
void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
unsigned long flags)
{
struct kprobe *kp;
@@ -107,7 +213,8 @@ void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
return;
}
int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
int trapnr)
{
/*
* if we faulted "during" the execution of a user specified
@@ -120,19 +227,159 @@ int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
return 0;
}
static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe *kp = curr_kprobe;
if (curr_kprobe && kp->break_handler) {
if (kp->break_handler(kp, regs)) {
curr_kprobe = NULL;
return 1;
}
}
curr_kprobe = NULL;
return 0;
}
struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
{
struct hlist_node *node;
struct kretprobe_instance *ri;
hlist_for_each_entry(ri, node, &rp->free_instances, uflist)
return ri;
return NULL;
}
static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
{
struct hlist_node *node;
struct kretprobe_instance *ri;
hlist_for_each_entry(ri, node, &rp->used_instances, uflist)
return ri;
return NULL;
}
void add_rp_inst(struct kretprobe_instance *ri)
{
/*
* Remove rp inst off the free list -
* Add it back when probed function returns
*/
hlist_del(&ri->uflist);
/* Add rp inst onto table */
INIT_HLIST_NODE(&ri->hlist);
hlist_add_head(&ri->hlist,
&kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]);
/* Also add this rp inst to the used list. */
INIT_HLIST_NODE(&ri->uflist);
hlist_add_head(&ri->uflist, &ri->rp->used_instances);
}
void recycle_rp_inst(struct kretprobe_instance *ri)
{
/* remove rp inst off the rprobe_inst_table */
hlist_del(&ri->hlist);
if (ri->rp) {
/* remove rp inst off the used list */
hlist_del(&ri->uflist);
/* put rp inst back onto the free list */
INIT_HLIST_NODE(&ri->uflist);
hlist_add_head(&ri->uflist, &ri->rp->free_instances);
} else
/* Unregistering */
kfree(ri);
}
struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
{
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
}
/*
* This function is called from exit_thread or flush_thread when task tk's
* stack is being recycled so that we can recycle any function-return probe
* instances associated with this task. These left over instances represent
* probed functions that have been called but will never return.
*/
void kprobe_flush_task(struct task_struct *tk)
{
struct kretprobe_instance *ri;
struct hlist_head *head;
struct hlist_node *node, *tmp;
unsigned long flags = 0;
spin_lock_irqsave(&kprobe_lock, flags);
head = kretprobe_inst_table_head(current);
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
if (ri->task == tk)
recycle_rp_inst(ri);
}
spin_unlock_irqrestore(&kprobe_lock, flags);
}
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
* hits it will set up the return probe.
*/
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
/*TODO: consider to only swap the RA after the last pre_handler fired */
arch_prepare_kretprobe(rp, regs);
return 0;
}
static inline void free_rp_inst(struct kretprobe *rp)
{
struct kretprobe_instance *ri;
while ((ri = get_free_rp_inst(rp)) != NULL) {
hlist_del(&ri->uflist);
kfree(ri);
}
}
/*
* Keep all fields in the kprobe consistent
*/
static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
{
memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
}
/*
* Add the new probe to old_p->list. Fail if this is the
* second jprobe at the address - two jprobes can't coexist
*/
static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
{
struct kprobe *kp;
if (p->break_handler) {
list_for_each_entry(kp, &old_p->list, list) {
if (kp->break_handler)
return -EEXIST;
}
list_add_tail(&p->list, &old_p->list);
} else
list_add(&p->list, &old_p->list);
return 0;
}
/*
* Fill in the required fields of the "manager kprobe". Replace the
* earlier kprobe in the hlist with the manager kprobe
*/
static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
{
copy_kprobe(p, ap);
ap->addr = p->addr;
ap->opcode = p->opcode;
memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn));
ap->pre_handler = aggr_pre_handler;
ap->post_handler = aggr_post_handler;
ap->fault_handler = aggr_fault_handler;
ap->break_handler = aggr_break_handler;
INIT_LIST_HEAD(&ap->list);
list_add(&p->list, &ap->list);
@@ -153,16 +400,16 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
int ret = 0;
struct kprobe *ap;
if (old_p->break_handler || p->break_handler) {
ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */
} else if (old_p->pre_handler == aggr_pre_handler) {
list_add(&p->list, &old_p->list);
if (old_p->pre_handler == aggr_pre_handler) {
copy_kprobe(old_p, p);
ret = add_new_kprobe(old_p, p);
} else {
ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC);
if (!ap)
return -ENOMEM;
add_aggr_kprobe(ap, old_p);
list_add(&p->list, &ap->list);
copy_kprobe(ap, p);
ret = add_new_kprobe(ap, p);
}
return ret;
}
@@ -170,10 +417,8 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
/* kprobe removal house-keeping routines */
static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
{
*p->addr = p->opcode;
arch_disarm_kprobe(p);
hlist_del(&p->hlist);
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
spin_unlock_irqrestore(&kprobe_lock, flags);
arch_remove_kprobe(p);
}
@@ -200,6 +445,7 @@ int register_kprobe(struct kprobe *p)
}
spin_lock_irqsave(&kprobe_lock, flags);
old_p = get_kprobe(p->addr);
p->nmissed = 0;
if (old_p) {
ret = register_aggr_kprobe(old_p, p);
goto out;
@@ -210,10 +456,8 @@ int register_kprobe(struct kprobe *p)
hlist_add_head(&p->hlist,
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
p->opcode = *p->addr;
*p->addr = BREAKPOINT_INSTRUCTION;
flush_icache_range((unsigned long) p->addr,
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
arch_arm_kprobe(p);
out:
spin_unlock_irqrestore(&kprobe_lock, flags);
rm_kprobe:
@@ -257,16 +501,83 @@ void unregister_jprobe(struct jprobe *jp)
unregister_kprobe(&jp->kp);
}
#ifdef ARCH_SUPPORTS_KRETPROBES
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
struct kretprobe_instance *inst;
int i;
rp->kp.pre_handler = pre_handler_kretprobe;
/* Pre-allocate memory for max kretprobe instances */
if (rp->maxactive <= 0) {
#ifdef CONFIG_PREEMPT
rp->maxactive = max(10, 2 * NR_CPUS);
#else
rp->maxactive = NR_CPUS;
#endif
}
INIT_HLIST_HEAD(&rp->used_instances);
INIT_HLIST_HEAD(&rp->free_instances);
for (i = 0; i < rp->maxactive; i++) {
inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
if (inst == NULL) {
free_rp_inst(rp);
return -ENOMEM;
}
INIT_HLIST_NODE(&inst->uflist);
hlist_add_head(&inst->uflist, &rp->free_instances);
}
rp->nmissed = 0;
/* Establish function entry probe point */
if ((ret = register_kprobe(&rp->kp)) != 0)
free_rp_inst(rp);
return ret;
}
#else /* ARCH_SUPPORTS_KRETPROBES */
int register_kretprobe(struct kretprobe *rp)
{
return -ENOSYS;
}
#endif /* ARCH_SUPPORTS_KRETPROBES */
void unregister_kretprobe(struct kretprobe *rp)
{
unsigned long flags;
struct kretprobe_instance *ri;
unregister_kprobe(&rp->kp);
/* No race here */
spin_lock_irqsave(&kprobe_lock, flags);
free_rp_inst(rp);
while ((ri = get_used_rp_inst(rp)) != NULL) {
ri->rp = NULL;
hlist_del(&ri->uflist);
}
spin_unlock_irqrestore(&kprobe_lock, flags);
}
static int __init init_kprobes(void)
{
int i, err = 0;
/* FIXME allocate the probe table, currently defined statically */
/* initialize all list heads */
for (i = 0; i < KPROBE_TABLE_SIZE; i++)
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
INIT_HLIST_HEAD(&kprobe_table[i]);
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
}
err = arch_init();
if (!err)
err = register_die_notifier(&kprobe_exceptions_nb);
err = register_die_notifier(&kprobe_exceptions_nb);
return err;
}
@@ -277,3 +588,6 @@ EXPORT_SYMBOL_GPL(unregister_kprobe);
EXPORT_SYMBOL_GPL(register_jprobe);
EXPORT_SYMBOL_GPL(unregister_jprobe);
EXPORT_SYMBOL_GPL(jprobe_return);
EXPORT_SYMBOL_GPL(register_kretprobe);
EXPORT_SYMBOL_GPL(unregister_kretprobe);

View File

@@ -30,12 +30,25 @@ static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
KERNEL_ATTR_RO(hotplug_seqnum);
#endif
#ifdef CONFIG_KEXEC
#include <asm/kexec.h>
static ssize_t crash_notes_show(struct subsystem *subsys, char *page)
{
return sprintf(page, "%p\n", (void *)crash_notes);
}
KERNEL_ATTR_RO(crash_notes);
#endif
decl_subsys(kernel, NULL, NULL);
EXPORT_SYMBOL_GPL(kernel_subsys);
static struct attribute * kernel_attrs[] = {
#ifdef CONFIG_HOTPLUG
&hotplug_seqnum_attr.attr,
#endif
#ifdef CONFIG_KEXEC
&crash_notes_attr.attr,
#endif
NULL
};

View File

@@ -35,6 +35,7 @@
#include <linux/notifier.h>
#include <linux/stop_machine.h>
#include <linux/device.h>
#include <linux/string.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
#include <asm/cacheflush.h>
@@ -370,6 +371,43 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
#endif /* CONFIG_SMP */
#ifdef CONFIG_MODULE_UNLOAD
#define MODINFO_ATTR(field) \
static void setup_modinfo_##field(struct module *mod, const char *s) \
{ \
mod->field = kstrdup(s, GFP_KERNEL); \
} \
static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
struct module *mod, char *buffer) \
{ \
return sprintf(buffer, "%s\n", mod->field); \
} \
static int modinfo_##field##_exists(struct module *mod) \
{ \
return mod->field != NULL; \
} \
static void free_modinfo_##field(struct module *mod) \
{ \
kfree(mod->field); \
mod->field = NULL; \
} \
static struct module_attribute modinfo_##field = { \
.attr = { .name = __stringify(field), .mode = 0444, \
.owner = THIS_MODULE }, \
.show = show_modinfo_##field, \
.setup = setup_modinfo_##field, \
.test = modinfo_##field##_exists, \
.free = free_modinfo_##field, \
};
MODINFO_ATTR(version);
MODINFO_ATTR(srcversion);
static struct module_attribute *modinfo_attrs[] = {
&modinfo_version,
&modinfo_srcversion,
NULL,
};
/* Init the unload section of the module. */
static void module_unload_init(struct module *mod)
{
@@ -379,7 +417,7 @@ static void module_unload_init(struct module *mod)
for (i = 0; i < NR_CPUS; i++)
local_set(&mod->ref[i].count, 0);
/* Hold reference count during initialization. */
local_set(&mod->ref[_smp_processor_id()].count, 1);
local_set(&mod->ref[raw_smp_processor_id()].count, 1);
/* Backwards compatibility macros put refcount during init. */
mod->waiter = current;
}
@@ -692,7 +730,7 @@ static int obsparm_copy_string(const char *val, struct kernel_param *kp)
return 0;
}
int set_obsolete(const char *val, struct kernel_param *kp)
static int set_obsolete(const char *val, struct kernel_param *kp)
{
unsigned int min, max;
unsigned int size, maxsize;
@@ -1031,6 +1069,32 @@ static void module_remove_refcnt_attr(struct module *mod)
}
#endif
#ifdef CONFIG_MODULE_UNLOAD
static int module_add_modinfo_attrs(struct module *mod)
{
struct module_attribute *attr;
int error = 0;
int i;
for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
if (!attr->test ||
(attr->test && attr->test(mod)))
error = sysfs_create_file(&mod->mkobj.kobj,&attr->attr);
}
return error;
}
static void module_remove_modinfo_attrs(struct module *mod)
{
struct module_attribute *attr;
int i;
for (i = 0; (attr = modinfo_attrs[i]); i++) {
sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
attr->free(mod);
}
}
#endif
static int mod_sysfs_setup(struct module *mod,
struct kernel_param *kparam,
@@ -1056,6 +1120,12 @@ static int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg;
#ifdef CONFIG_MODULE_UNLOAD
err = module_add_modinfo_attrs(mod);
if (err)
goto out_unreg;
#endif
return 0;
out_unreg:
@@ -1066,6 +1136,9 @@ out:
static void mod_kobject_remove(struct module *mod)
{
#ifdef CONFIG_MODULE_UNLOAD
module_remove_modinfo_attrs(mod);
#endif
module_remove_refcnt_attr(mod);
module_param_sysfs_remove(mod);
@@ -1311,6 +1384,23 @@ static char *get_modinfo(Elf_Shdr *sechdrs,
return NULL;
}
#ifdef CONFIG_MODULE_UNLOAD
static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
unsigned int infoindex)
{
struct module_attribute *attr;
int i;
for (i = 0; (attr = modinfo_attrs[i]); i++) {
if (attr->setup)
attr->setup(mod,
get_modinfo(sechdrs,
infoindex,
attr->attr.name));
}
}
#endif
#ifdef CONFIG_KALLSYMS
int is_exported(const char *name, const struct module *mod)
{
@@ -1615,6 +1705,11 @@ static struct module *load_module(void __user *umod,
/* Set up license info based on the info section */
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
#ifdef CONFIG_MODULE_UNLOAD
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, sechdrs, infoindex);
#endif
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
mod);

View File

@@ -18,6 +18,7 @@
#include <linux/sysrq.h>
#include <linux/interrupt.h>
#include <linux/nmi.h>
#include <linux/kexec.h>
int panic_timeout;
int panic_on_oops;
@@ -63,6 +64,13 @@ NORET_TYPE void panic(const char * fmt, ...)
unsigned long caller = (unsigned long) __builtin_return_address(0);
#endif
/*
* It's possible to come here directly from a panic-assertion and not
* have preempt disabled. Some functions called from here want
* preempt to be disabled. No point enabling it later though...
*/
preempt_disable();
bust_spinlocks(1);
va_start(args, fmt);
vsnprintf(buf, sizeof(buf), fmt, args);
@@ -70,7 +78,19 @@ NORET_TYPE void panic(const char * fmt, ...)
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
bust_spinlocks(0);
/*
* If we have crashed and we have a crash kernel loaded let it handle
* everything else.
* Do we want to call this before we try to display a message?
*/
crash_kexec(NULL);
#ifdef CONFIG_SMP
/*
* Note smp_send_stop is the usual smp shutdown function, which
* unfortunately means it may not be hardened to work in a panic
* situation.
*/
smp_send_stop();
#endif
@@ -79,8 +99,7 @@ NORET_TYPE void panic(const char * fmt, ...)
if (!panic_blink)
panic_blink = no_blink;
if (panic_timeout > 0)
{
if (panic_timeout > 0) {
/*
* Delay timeout seconds before rebooting the machine.
* We can't use the "normal" timers since we just panicked..

View File

@@ -629,7 +629,7 @@ static ssize_t module_attr_show(struct kobject *kobj,
mk = to_module_kobject(kobj);
if (!attribute->show)
return -EPERM;
return -EIO;
if (!try_module_get(mk->mod))
return -ENODEV;
@@ -653,7 +653,7 @@ static ssize_t module_attr_store(struct kobject *kobj,
mk = to_module_kobject(kobj);
if (!attribute->store)
return -EPERM;
return -EIO;
if (!try_module_get(mk->mod))
return -ENODEV;

View File

@@ -88,23 +88,6 @@ static kmem_cache_t *posix_timers_cache;
static struct idr posix_timers_id;
static DEFINE_SPINLOCK(idr_lock);
/*
* Just because the timer is not in the timer list does NOT mean it is
* inactive. It could be in the "fire" routine getting a new expire time.
*/
#define TIMER_INACTIVE 1
#ifdef CONFIG_SMP
# define timer_active(tmr) \
((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE)
# define set_timer_inactive(tmr) \
do { \
(tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \
} while (0)
#else
# define timer_active(tmr) BARFY // error to use outside of SMP
# define set_timer_inactive(tmr) do { } while (0)
#endif
/*
* we assume that the new SIGEV_THREAD_ID shares no bits with the other
* SIGEV values. Here we put out an error if this assumption fails.
@@ -226,7 +209,6 @@ static inline int common_timer_create(struct k_itimer *new_timer)
init_timer(&new_timer->it.real.timer);
new_timer->it.real.timer.data = (unsigned long) new_timer;
new_timer->it.real.timer.function = posix_timer_fn;
set_timer_inactive(new_timer);
return 0;
}
@@ -480,7 +462,6 @@ static void posix_timer_fn(unsigned long __data)
int do_notify = 1;
spin_lock_irqsave(&timr->it_lock, flags);
set_timer_inactive(timr);
if (!list_empty(&timr->it.real.abs_timer_entry)) {
spin_lock(&abs_list.lock);
do {
@@ -983,8 +964,8 @@ common_timer_set(struct k_itimer *timr, int flags,
* careful here. If smp we could be in the "fire" routine which will
* be spinning as we hold the lock. But this is ONLY an SMP issue.
*/
if (try_to_del_timer_sync(&timr->it.real.timer) < 0) {
#ifdef CONFIG_SMP
if (timer_active(timr) && !del_timer(&timr->it.real.timer))
/*
* It can only be active if on an other cpu. Since
* we have cleared the interval stuff above, it should
@@ -994,11 +975,9 @@ common_timer_set(struct k_itimer *timr, int flags,
* a "retry" exit status.
*/
return TIMER_RETRY;
set_timer_inactive(timr);
#else
del_timer(&timr->it.real.timer);
#endif
}
remove_from_abslist(timr);
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
@@ -1083,8 +1062,9 @@ retry:
static inline int common_timer_del(struct k_itimer *timer)
{
timer->it.real.incr = 0;
if (try_to_del_timer_sync(&timer->it.real.timer) < 0) {
#ifdef CONFIG_SMP
if (timer_active(timer) && !del_timer(&timer->it.real.timer))
/*
* It can only be active if on an other cpu. Since
* we have cleared the interval stuff above, it should
@@ -1094,9 +1074,9 @@ static inline int common_timer_del(struct k_itimer *timer)
* a "retry" exit status.
*/
return TIMER_RETRY;
#else
del_timer(&timer->it.real.timer);
#endif
}
remove_from_abslist(timer);
return 0;

View File

@@ -27,8 +27,8 @@ config PM_DEBUG
like suspend support.
config SOFTWARE_SUSPEND
bool "Software Suspend (EXPERIMENTAL)"
depends on EXPERIMENTAL && PM && SWAP
bool "Software Suspend"
depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP))
---help---
Enable the possibility of suspending the machine.
It doesn't need APM.
@@ -72,3 +72,7 @@ config PM_STD_PARTITION
suspended image to. It will simply pick the first available swap
device.
config SUSPEND_SMP
bool
depends on HOTPLUG_CPU && X86 && PM
default y

View File

@@ -3,9 +3,9 @@ ifeq ($(CONFIG_PM_DEBUG),y)
EXTRA_CFLAGS += -DDEBUG
endif
swsusp-smp-$(CONFIG_SMP) += smp.o
obj-y := main.o process.o console.o pm.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o
obj-$(CONFIG_SUSPEND_SMP) += smp.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o

View File

@@ -117,8 +117,8 @@ static void finish(void)
{
device_resume();
platform_finish();
enable_nonboot_cpus();
thaw_processes();
enable_nonboot_cpus();
pm_restore_console();
}
@@ -131,28 +131,35 @@ static int prepare_processes(void)
sys_sync();
disable_nonboot_cpus();
if (freeze_processes()) {
error = -EBUSY;
return error;
goto thaw;
}
if (pm_disk_mode == PM_DISK_PLATFORM) {
if (pm_ops && pm_ops->prepare) {
if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
return error;
goto thaw;
}
}
/* Free memory before shutting down devices. */
free_some_memory();
return 0;
thaw:
thaw_processes();
enable_nonboot_cpus();
pm_restore_console();
return error;
}
static void unprepare_processes(void)
{
enable_nonboot_cpus();
platform_finish();
thaw_processes();
enable_nonboot_cpus();
pm_restore_console();
}
@@ -160,15 +167,9 @@ static int prepare_devices(void)
{
int error;
disable_nonboot_cpus();
if ((error = device_suspend(PMSG_FREEZE))) {
if ((error = device_suspend(PMSG_FREEZE)))
printk("Some devices failed to suspend\n");
platform_finish();
enable_nonboot_cpus();
return error;
}
return 0;
return error;
}
/**
@@ -185,9 +186,9 @@ int pm_suspend_disk(void)
int error;
error = prepare_processes();
if (!error) {
error = prepare_devices();
}
if (error)
return error;
error = prepare_devices();
if (error) {
unprepare_processes();
@@ -250,7 +251,7 @@ static int software_resume(void)
if ((error = prepare_processes())) {
swsusp_close();
goto Cleanup;
goto Done;
}
pr_debug("PM: Reading swsusp image.\n");

View File

@@ -55,6 +55,13 @@ static int suspend_prepare(suspend_state_t state)
pm_prepare_console();
disable_nonboot_cpus();
if (num_online_cpus() != 1) {
error = -EPERM;
goto Enable_cpu;
}
if (freeze_processes()) {
error = -EAGAIN;
goto Thaw;
@@ -75,6 +82,8 @@ static int suspend_prepare(suspend_state_t state)
pm_ops->finish(state);
Thaw:
thaw_processes();
Enable_cpu:
enable_nonboot_cpus();
pm_restore_console();
return error;
}
@@ -113,6 +122,7 @@ static void suspend_finish(suspend_state_t state)
if (pm_ops && pm_ops->finish)
pm_ops->finish(state);
thaw_processes();
enable_nonboot_cpus();
pm_restore_console();
}
@@ -150,12 +160,6 @@ static int enter_state(suspend_state_t state)
goto Unlock;
}
/* Suspend is hard to get right on SMP. */
if (num_online_cpus() != 1) {
error = -EPERM;
goto Unlock;
}
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
if ((error = suspend_prepare(state)))
goto Unlock;

View File

@@ -32,7 +32,7 @@ static inline int freezeable(struct task_struct * p)
}
/* Refrigerator is place where frozen processes are stored :-). */
void refrigerator(unsigned long flag)
void refrigerator(void)
{
/* Hmm, should we be allowed to suspend when there are realtime
processes around? */
@@ -41,14 +41,13 @@ void refrigerator(unsigned long flag)
current->state = TASK_UNINTERRUPTIBLE;
pr_debug("%s entered refrigerator\n", current->comm);
printk("=");
current->flags &= ~PF_FREEZE;
frozen_process(current);
spin_lock_irq(&current->sighand->siglock);
recalc_sigpending(); /* We sent fake signal, clean it up */
spin_unlock_irq(&current->sighand->siglock);
current->flags |= PF_FROZEN;
while (current->flags & PF_FROZEN)
while (frozen(current))
schedule();
pr_debug("%s left refrigerator\n", current->comm);
current->state = save;
@@ -57,10 +56,10 @@ void refrigerator(unsigned long flag)
/* 0 = success, else # of processes that we failed to stop */
int freeze_processes(void)
{
int todo;
unsigned long start_time;
int todo;
unsigned long start_time;
struct task_struct *g, *p;
printk( "Stopping tasks: " );
start_time = jiffies;
do {
@@ -70,14 +69,12 @@ int freeze_processes(void)
unsigned long flags;
if (!freezeable(p))
continue;
if ((p->flags & PF_FROZEN) ||
if ((frozen(p)) ||
(p->state == TASK_TRACED) ||
(p->state == TASK_STOPPED))
continue;
/* FIXME: smp problem here: we may not access other process' flags
without locking */
p->flags |= PF_FREEZE;
freeze(p);
spin_lock_irqsave(&p->sighand->siglock, flags);
signal_wake_up(p, 0);
spin_unlock_irqrestore(&p->sighand->siglock, flags);
@@ -91,7 +88,7 @@ int freeze_processes(void)
return todo;
}
} while(todo);
printk( "|\n" );
BUG_ON(in_atomic());
return 0;
@@ -106,10 +103,7 @@ void thaw_processes(void)
do_each_thread(g, p) {
if (!freezeable(p))
continue;
if (p->flags & PF_FROZEN) {
p->flags &= ~PF_FROZEN;
wake_up_process(p);
} else
if (!thaw_process(p))
printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
} while_each_thread(g, p);

View File

@@ -13,73 +13,52 @@
#include <linux/interrupt.h>
#include <linux/suspend.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <asm/atomic.h>
#include <asm/tlbflush.h>
static atomic_t cpu_counter, freeze;
static void smp_pause(void * data)
{
struct saved_context ctxt;
__save_processor_state(&ctxt);
printk("Sleeping in:\n");
dump_stack();
atomic_inc(&cpu_counter);
while (atomic_read(&freeze)) {
/* FIXME: restore takes place at random piece inside this.
This should probably be written in assembly, and
preserve general-purpose registers, too
What about stack? We may need to move to new stack here.
This should better be ran with interrupts disabled.
*/
cpu_relax();
barrier();
}
atomic_dec(&cpu_counter);
__restore_processor_state(&ctxt);
}
static cpumask_t oldmask;
/* This is protected by pm_sem semaphore */
static cpumask_t frozen_cpus;
void disable_nonboot_cpus(void)
{
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(0));
printk("Freezing CPUs (at %d)", _smp_processor_id());
current->state = TASK_INTERRUPTIBLE;
schedule_timeout(HZ);
printk("...");
BUG_ON(_smp_processor_id() != 0);
int cpu, error;
/* FIXME: for this to work, all the CPUs must be running
* "idle" thread (or we deadlock). Is that guaranteed? */
atomic_set(&cpu_counter, 0);
atomic_set(&freeze, 1);
smp_call_function(smp_pause, NULL, 0, 0);
while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
cpu_relax();
barrier();
error = 0;
cpus_clear(frozen_cpus);
printk("Freezing cpus ...\n");
for_each_online_cpu(cpu) {
if (cpu == 0)
continue;
error = cpu_down(cpu);
if (!error) {
cpu_set(cpu, frozen_cpus);
printk("CPU%d is down\n", cpu);
continue;
}
printk("Error taking cpu %d down: %d\n", cpu, error);
}
printk("ok\n");
BUG_ON(smp_processor_id() != 0);
if (error)
panic("cpus not sleeping");
}
void enable_nonboot_cpus(void)
{
printk("Restarting CPUs");
atomic_set(&freeze, 0);
while (atomic_read(&cpu_counter)) {
cpu_relax();
barrier();
}
printk("...");
set_cpus_allowed(current, oldmask);
schedule();
printk("ok\n");
int cpu, error;
printk("Thawing cpus ...\n");
for_each_cpu_mask(cpu, frozen_cpus) {
error = smp_prepare_cpu(cpu);
if (!error)
error = cpu_up(cpu);
if (!error) {
printk("CPU%d is up\n", cpu);
continue;
}
printk("Error taking cpu %d up: %d\n", cpu, error);
panic("Not enough cpus");
}
cpus_clear(frozen_cpus);
}

View File

@@ -10,12 +10,12 @@
* This file is released under the GPLv2.
*
* I'd like to thank the following people for their work:
*
*
* Pavel Machek <pavel@ucw.cz>:
* Modifications, defectiveness pointing, being with me at the very beginning,
* suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
*
* Steve Doddi <dirk@loth.demon.co.uk>:
* Steve Doddi <dirk@loth.demon.co.uk>:
* Support the possibility of hardware state restoring.
*
* Raph <grey.havens@earthling.net>:
@@ -81,14 +81,14 @@ static int nr_copy_pages_check;
extern char resume_file[];
/* Local variables that should not be affected by save */
unsigned int nr_copy_pages __nosavedata = 0;
static unsigned int nr_copy_pages __nosavedata = 0;
/* Suspend pagedir is allocated before final copy, therefore it
must be freed after resume
must be freed after resume
Warning: this is evil. There are actually two pagedirs at time of
resume. One is "pagedir_save", which is empty frame allocated at
time of suspend, that must be freed. Second is "pagedir_nosave",
time of suspend, that must be freed. Second is "pagedir_nosave",
allocated at time of resume, that travels through memory not to
collide with anything.
@@ -132,7 +132,7 @@ static int mark_swapfiles(swp_entry_t prev)
{
int error;
rw_swap_page_sync(READ,
rw_swap_page_sync(READ,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)&swsusp_header));
if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
@@ -140,7 +140,7 @@ static int mark_swapfiles(swp_entry_t prev)
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
swsusp_header.swsusp_info = prev;
error = rw_swap_page_sync(WRITE,
error = rw_swap_page_sync(WRITE,
swp_entry(root_swap, 0),
virt_to_page((unsigned long)
&swsusp_header));
@@ -174,22 +174,22 @@ static int is_resume_device(const struct swap_info_struct *swap_info)
static int swsusp_swap_check(void) /* This is called before saving image */
{
int i, len;
len=strlen(resume_file);
root_swap = 0xFFFF;
swap_list_lock();
for(i=0; i<MAX_SWAPFILES; i++) {
for (i=0; i<MAX_SWAPFILES; i++) {
if (swap_info[i].flags == 0) {
swapfile_used[i]=SWAPFILE_UNUSED;
} else {
if(!len) {
if (!len) {
printk(KERN_WARNING "resume= option should be used to set suspend device" );
if(root_swap == 0xFFFF) {
if (root_swap == 0xFFFF) {
swapfile_used[i] = SWAPFILE_SUSPEND;
root_swap = i;
} else
swapfile_used[i] = SWAPFILE_IGNORED;
swapfile_used[i] = SWAPFILE_IGNORED;
} else {
/* we ignore all swap devices that are not the resume_file */
if (is_resume_device(&swap_info[i])) {
@@ -209,15 +209,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */
* This is called after saving image so modification
* will be lost after resume... and that's what we want.
* we make the device unusable. A new call to
* lock_swapdevices can unlock the devices.
* lock_swapdevices can unlock the devices.
*/
static void lock_swapdevices(void)
{
int i;
swap_list_lock();
for(i = 0; i< MAX_SWAPFILES; i++)
if(swapfile_used[i] == SWAPFILE_IGNORED) {
for (i = 0; i< MAX_SWAPFILES; i++)
if (swapfile_used[i] == SWAPFILE_IGNORED) {
swap_info[i].flags ^= 0xFF;
}
swap_list_unlock();
@@ -229,7 +229,7 @@ static void lock_swapdevices(void)
* @loc: Place to store the entry we used.
*
* Allocate a new swap entry and 'sync' it. Note we discard -EIO
* errors. That is an artifact left over from swsusp. It did not
* errors. That is an artifact left over from swsusp. It did not
* check the return of rw_swap_page_sync() at all, since most pages
* written back to swap would return -EIO.
* This is a partial improvement, since we will at least return other
@@ -241,7 +241,7 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
int error = 0;
entry = get_swap_page();
if (swp_offset(entry) &&
if (swp_offset(entry) &&
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
error = rw_swap_page_sync(WRITE, entry,
virt_to_page(addr));
@@ -257,7 +257,7 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
/**
* data_free - Free the swap entries used by the saved image.
*
* Walk the list of used swap entries and free each one.
* Walk the list of used swap entries and free each one.
* This is only used for cleanup when suspend fails.
*/
static void data_free(void)
@@ -290,7 +290,7 @@ static int data_write(void)
mod = 1;
printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
for_each_pbe(p, pagedir_nosave) {
for_each_pbe (p, pagedir_nosave) {
if (!(i%mod))
printk( "\b\b\b\b%3d%%", i / mod );
if ((error = write_page(p->address, &(p->swap_address))))
@@ -335,7 +335,7 @@ static int close_swap(void)
dump_info();
error = write_page((unsigned long)&swsusp_info, &entry);
if (!error) {
if (!error) {
printk( "S" );
error = mark_swapfiles(entry);
printk( "|\n" );
@@ -370,7 +370,7 @@ static int write_pagedir(void)
struct pbe * pbe;
printk( "Writing pagedir...");
for_each_pb_page(pbe, pagedir_nosave) {
for_each_pb_page (pbe, pagedir_nosave) {
if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
return error;
}
@@ -472,7 +472,7 @@ static int save_highmem(void)
int res = 0;
pr_debug("swsusp: Saving Highmem\n");
for_each_zone(zone) {
for_each_zone (zone) {
if (is_highmem(zone))
res = save_highmem_zone(zone);
if (res)
@@ -547,7 +547,7 @@ static void count_data_pages(void)
nr_copy_pages = 0;
for_each_zone(zone) {
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
@@ -562,9 +562,9 @@ static void copy_data_pages(void)
struct zone *zone;
unsigned long zone_pfn;
struct pbe * pbe = pagedir_nosave;
pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
for_each_zone(zone) {
for_each_zone (zone) {
if (is_highmem(zone))
continue;
mark_free_pages(zone);
@@ -702,7 +702,7 @@ static void free_image_pages(void)
{
struct pbe * p;
for_each_pbe(p, pagedir_save) {
for_each_pbe (p, pagedir_save) {
if (p->address) {
ClearPageNosave(virt_to_page(p->address));
free_page(p->address);
@@ -719,7 +719,7 @@ static int alloc_image_pages(void)
{
struct pbe * p;
for_each_pbe(p, pagedir_save) {
for_each_pbe (p, pagedir_save) {
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
if (!p->address)
return -ENOMEM;
@@ -740,7 +740,7 @@ void swsusp_free(void)
/**
* enough_free_mem - Make sure we enough free memory to snapshot.
*
* Returns TRUE or FALSE after checking the number of available
* Returns TRUE or FALSE after checking the number of available
* free pages.
*/
@@ -758,11 +758,11 @@ static int enough_free_mem(void)
/**
* enough_swap - Make sure we have enough swap to save the image.
*
* Returns TRUE or FALSE after checking the total amount of swap
* Returns TRUE or FALSE after checking the total amount of swap
* space avaiable.
*
* FIXME: si_swapinfo(&i) returns all swap devices information.
* We should only consider resume_device.
* We should only consider resume_device.
*/
static int enough_swap(void)
@@ -781,18 +781,18 @@ static int swsusp_alloc(void)
{
int error;
pagedir_nosave = NULL;
nr_copy_pages = calc_nr(nr_copy_pages);
pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
pagedir_nosave = NULL;
if (!enough_free_mem())
return -ENOMEM;
if (!enough_swap())
return -ENOSPC;
nr_copy_pages = calc_nr(nr_copy_pages);
if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
return -ENOMEM;
@@ -827,8 +827,8 @@ static int suspend_prepare_image(void)
error = swsusp_alloc();
if (error)
return error;
/* During allocating of suspend pagedir, new cold pages may appear.
/* During allocating of suspend pagedir, new cold pages may appear.
* Kill them.
*/
drain_local_pages();
@@ -929,21 +929,6 @@ int swsusp_resume(void)
return error;
}
/* More restore stuff */
/*
* Returns true if given address/order collides with any orig_address
*/
static int does_collide_order(unsigned long addr, int order)
{
int i;
for (i=0; i < (1<<order); i++)
if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE)))
return 1;
return 0;
}
/**
* On resume, for storing the PBE list and the image,
* we can only use memory pages that do not conflict with the pages
@@ -973,7 +958,7 @@ static unsigned long get_usable_page(unsigned gfp_mask)
unsigned long m;
m = get_zeroed_page(gfp_mask);
while (does_collide_order(m, 0)) {
while (!PageNosaveFree(virt_to_page(m))) {
eat_page((void *)m);
m = get_zeroed_page(gfp_mask);
if (!m)
@@ -1045,7 +1030,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
/* Set page flags */
for_each_zone(zone) {
for_each_zone (zone) {
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
SetPageNosaveFree(pfn_to_page(zone_pfn +
zone->zone_start_pfn));
@@ -1061,7 +1046,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
/* Relocate colliding pages */
for_each_pb_page (pbpage, pblist) {
if (does_collide_order((unsigned long)pbpage, 0)) {
if (!PageNosaveFree(virt_to_page((unsigned long)pbpage))) {
m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
if (!m) {
error = -ENOMEM;
@@ -1193,8 +1178,10 @@ static const char * sanity_check(void)
return "version";
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
return "machine";
#if 0
if(swsusp_info.cpus != num_online_cpus())
return "number of cpus";
#endif
return NULL;
}

View File

@@ -588,8 +588,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
log_level_unknown = 1;
}
if (!cpu_online(smp_processor_id()) &&
system_state != SYSTEM_RUNNING) {
if (!cpu_online(smp_processor_id())) {
/*
* Some console drivers may assume that per-cpu resources have
* been allocated. So don't allow them to be called by this
@@ -876,8 +875,10 @@ void register_console(struct console * console)
break;
console->flags |= CON_ENABLED;
console->index = console_cmdline[i].index;
if (i == preferred_console)
if (i == selected_console) {
console->flags |= CON_CONSDEV;
preferred_console = selected_console;
}
break;
}
@@ -897,6 +898,8 @@ void register_console(struct console * console)
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
console->next = console_drivers;
console_drivers = console;
if (console->next)
console->next->flags &= ~CON_CONSDEV;
} else {
console->next = console_drivers->next;
console_drivers->next = console;
@@ -937,10 +940,14 @@ int unregister_console(struct console * console)
/* If last console is removed, we re-enable picking the first
* one that gets registered. Without that, pmac early boot console
* would prevent fbcon from taking over.
*
* If this isn't the last console and it has CON_CONSDEV set, we
* need to set it on the next preferred console.
*/
if (console_drivers == NULL)
preferred_console = selected_console;
else if (console->flags & CON_CONSDEV)
console_drivers->flags |= CON_CONSDEV;
release_console_sem();
return res;

View File

@@ -263,7 +263,7 @@ static int find_resource(struct resource *root, struct resource *new,
new->start = min;
if (new->end > max)
new->end = max;
new->start = (new->start + align - 1) & ~(align - 1);
new->start = ALIGN(new->start, align);
if (alignf)
alignf(alignf_data, new, size, align);
if (new->start < new->end && new->end - new->start >= size - 1) {

File diff suppressed because it is too large Load Diff

View File

@@ -213,6 +213,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
fastcall void recalc_sigpending_tsk(struct task_struct *t)
{
if (t->signal->group_stop_count > 0 ||
(freezing(t)) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked))
set_tsk_thread_flag(t, TIF_SIGPENDING);
@@ -2230,8 +2231,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
current->state = TASK_INTERRUPTIBLE;
timeout = schedule_timeout(timeout);
if (current->flags & PF_FREEZE)
refrigerator(PF_FREEZE);
try_to_freeze();
spin_lock_irq(&current->sighand->siglock);
sig = dequeue_signal(current, &these, &info);
current->blocked = current->real_blocked;

View File

@@ -100,7 +100,7 @@ static int stop_machine(void)
stopmachine_state = STOPMACHINE_WAIT;
for_each_online_cpu(i) {
if (i == _smp_processor_id())
if (i == raw_smp_processor_id())
continue;
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
if (ret < 0)
@@ -182,7 +182,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
/* If they don't care which CPU fn runs on, bind to any online one. */
if (cpu == NR_CPUS)
cpu = _smp_processor_id();
cpu = raw_smp_processor_id();
p = kthread_create(do_stop, &smdata, "kstopmachine");
if (!IS_ERR(p)) {

View File

@@ -16,6 +16,8 @@
#include <linux/init.h>
#include <linux/highuid.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
#include <linux/workqueue.h>
#include <linux/device.h>
#include <linux/key.h>
@@ -405,6 +407,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
case LINUX_REBOOT_CMD_HALT:
notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
system_state = SYSTEM_HALT;
device_suspend(PMSG_SUSPEND);
device_shutdown();
printk(KERN_EMERG "System halted.\n");
machine_halt();
@@ -415,6 +418,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
case LINUX_REBOOT_CMD_POWER_OFF:
notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
system_state = SYSTEM_POWER_OFF;
device_suspend(PMSG_SUSPEND);
device_shutdown();
printk(KERN_EMERG "Power down.\n");
machine_power_off();
@@ -431,11 +435,30 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
system_state = SYSTEM_RESTART;
device_suspend(PMSG_FREEZE);
device_shutdown();
printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
machine_restart(buffer);
break;
#ifdef CONFIG_KEXEC
case LINUX_REBOOT_CMD_KEXEC:
{
struct kimage *image;
image = xchg(&kexec_image, 0);
if (!image) {
unlock_kernel();
return -EINVAL;
}
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
system_state = SYSTEM_RESTART;
device_shutdown();
printk(KERN_EMERG "Starting new kernel\n");
machine_shutdown();
machine_kexec(image);
break;
}
#endif
#ifdef CONFIG_SOFTWARE_SUSPEND
case LINUX_REBOOT_CMD_SW_SUSPEND:
{
@@ -525,7 +548,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
}
if (new_egid != old_egid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
if (rgid != (gid_t) -1 ||
@@ -556,7 +579,7 @@ asmlinkage long sys_setgid(gid_t gid)
{
if(old_egid != gid)
{
current->mm->dumpable=0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->gid = current->egid = current->sgid = current->fsgid = gid;
@@ -565,7 +588,7 @@ asmlinkage long sys_setgid(gid_t gid)
{
if(old_egid != gid)
{
current->mm->dumpable=0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->egid = current->fsgid = gid;
@@ -596,7 +619,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
if(dumpclear)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->uid = new_ruid;
@@ -653,7 +676,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
if (new_euid != old_euid)
{
current->mm->dumpable=0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = current->euid = new_euid;
@@ -703,7 +726,7 @@ asmlinkage long sys_setuid(uid_t uid)
if (old_euid != uid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = current->euid = uid;
@@ -748,7 +771,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (euid != (uid_t) -1) {
if (euid != current->euid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->euid = euid;
@@ -798,7 +821,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
if (egid != (gid_t) -1) {
if (egid != current->egid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->egid = egid;
@@ -845,7 +868,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
{
if (uid != old_fsuid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsuid = uid;
@@ -875,7 +898,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
{
if (gid != old_fsgid)
{
current->mm->dumpable = 0;
current->mm->dumpable = suid_dumpable;
smp_wmb();
}
current->fsgid = gid;
@@ -894,35 +917,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
*/
if (tbuf) {
struct tms tmp;
struct task_struct *tsk = current;
struct task_struct *t;
cputime_t utime, stime, cutime, cstime;
read_lock(&tasklist_lock);
utime = tsk->signal->utime;
stime = tsk->signal->stime;
t = tsk;
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != tsk);
#ifdef CONFIG_SMP
if (thread_group_empty(current)) {
/*
* Single thread case without the use of any locks.
*
* We may race with release_task if two threads are
* executing. However, release task first adds up the
* counters (__exit_signal) before removing the task
* from the process tasklist (__unhash_process).
* __exit_signal also acquires and releases the
* siglock which results in the proper memory ordering
* so that the list modifications are always visible
* after the counters have been updated.
*
* If the counters have been updated by the second thread
* but the thread has not yet been removed from the list
* then the other branch will be executing which will
* block on tasklist_lock until the exit handling of the
* other task is finished.
*
* This also implies that the sighand->siglock cannot
* be held by another processor. So we can also
* skip acquiring that lock.
*/
utime = cputime_add(current->signal->utime, current->utime);
stime = cputime_add(current->signal->utime, current->stime);
cutime = current->signal->cutime;
cstime = current->signal->cstime;
} else
#endif
{
/*
* While we have tasklist_lock read-locked, no dying thread
* can be updating current->signal->[us]time. Instead,
* we got their counts included in the live thread loop.
* However, another thread can come in right now and
* do a wait call that updates current->signal->c[us]time.
* To make sure we always see that pair updated atomically,
* we take the siglock around fetching them.
*/
spin_lock_irq(&tsk->sighand->siglock);
cutime = tsk->signal->cutime;
cstime = tsk->signal->cstime;
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
/* Process with multiple threads */
struct task_struct *tsk = current;
struct task_struct *t;
read_lock(&tasklist_lock);
utime = tsk->signal->utime;
stime = tsk->signal->stime;
t = tsk;
do {
utime = cputime_add(utime, t->utime);
stime = cputime_add(stime, t->stime);
t = next_thread(t);
} while (t != tsk);
/*
* While we have tasklist_lock read-locked, no dying thread
* can be updating current->signal->[us]time. Instead,
* we got their counts included in the live thread loop.
* However, another thread can come in right now and
* do a wait call that updates current->signal->c[us]time.
* To make sure we always see that pair updated atomically,
* we take the siglock around fetching them.
*/
spin_lock_irq(&tsk->sighand->siglock);
cutime = tsk->signal->cutime;
cstime = tsk->signal->cstime;
spin_unlock_irq(&tsk->sighand->siglock);
read_unlock(&tasklist_lock);
}
tmp.tms_utime = cputime_to_clock_t(utime);
tmp.tms_stime = cputime_to_clock_t(stime);
tmp.tms_cutime = cputime_to_clock_t(cutime);
@@ -1225,7 +1282,7 @@ static void groups_sort(struct group_info *group_info)
}
/* a simple bsearch */
static int groups_search(struct group_info *group_info, gid_t grp)
int groups_search(struct group_info *group_info, gid_t grp)
{
int left, right;
@@ -1652,7 +1709,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
error = 1;
break;
case PR_SET_DUMPABLE:
if (arg2 != 0 && arg2 != 1) {
if (arg2 < 0 || arg2 > 2) {
error = -EINVAL;
break;
}

View File

@@ -18,6 +18,8 @@ cond_syscall(sys_acct);
cond_syscall(sys_lookup_dcookie);
cond_syscall(sys_swapon);
cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
cond_syscall(sys_init_module);
cond_syscall(sys_delete_module);
cond_syscall(sys_socketpair);
@@ -77,6 +79,7 @@ cond_syscall(sys_request_key);
cond_syscall(sys_keyctl);
cond_syscall(compat_sys_keyctl);
cond_syscall(compat_sys_socketcall);
cond_syscall(sys_set_zone_reclaim);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);

View File

@@ -58,6 +58,7 @@ extern int sysctl_overcommit_ratio;
extern int max_threads;
extern int sysrq_enabled;
extern int core_uses_pid;
extern int suid_dumpable;
extern char core_pattern[];
extern int cad_pid;
extern int pid_max;
@@ -950,6 +951,14 @@ static ctl_table fs_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
{
.ctl_name = KERN_SETUID_DUMPABLE,
.procname = "suid_dumpable",
.data = &suid_dumpable,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
@@ -991,8 +1000,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
int error = parse_table(name, nlen, oldval, oldlenp,
newval, newlen, head->ctl_table,
&context);
if (context)
kfree(context);
kfree(context);
if (error != -ENOTDIR)
return error;
tmp = tmp->next;

View File

@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec);
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
struct timer_base_s {
spinlock_t lock;
struct timer_list *running_timer;
};
typedef struct tvec_s {
struct list_head vec[TVN_SIZE];
} tvec_t;
@@ -66,9 +71,8 @@ typedef struct tvec_root_s {
} tvec_root_t;
struct tvec_t_base_s {
spinlock_t lock;
struct timer_base_s t_base;
unsigned long timer_jiffies;
struct timer_list *running_timer;
tvec_root_t tv1;
tvec_t tv2;
tvec_t tv3;
@@ -77,18 +81,16 @@ struct tvec_t_base_s {
} ____cacheline_aligned_in_smp;
typedef struct tvec_t_base_s tvec_base_t;
static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
static inline void set_running_timer(tvec_base_t *base,
struct timer_list *timer)
{
#ifdef CONFIG_SMP
base->running_timer = timer;
base->t_base.running_timer = timer;
#endif
}
/* Fake initialization */
static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
static void check_timer_failed(struct timer_list *timer)
{
static int whine_count;
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer)
/*
* Now fix it up
*/
spin_lock_init(&timer->lock);
timer->magic = TIMER_MAGIC;
}
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
list_add_tail(&timer->entry, vec);
}
typedef struct timer_base_s timer_base_t;
/*
* Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
* at compile time, and we need timer->base to lock the timer.
*/
timer_base_t __init_timer_base
____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
EXPORT_SYMBOL(__init_timer_base);
/***
* init_timer - initialize a timer.
* @timer: the timer to be initialized
*
* init_timer() must be done to a timer prior calling *any* of the
* other timer functions.
*/
void fastcall init_timer(struct timer_list *timer)
{
timer->entry.next = NULL;
timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
timer->magic = TIMER_MAGIC;
}
EXPORT_SYMBOL(init_timer);
static inline void detach_timer(struct timer_list *timer,
int clear_pending)
{
struct list_head *entry = &timer->entry;
__list_del(entry->prev, entry->next);
if (clear_pending)
entry->next = NULL;
entry->prev = LIST_POISON2;
}
/*
* We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
* means that all timers which are tied to this base via timer->base are
* locked, and the base itself is locked too.
*
* So __run_timers/migrate_timers can safely modify all timers which could
* be found on ->tvX lists.
*
* When the timer's base is locked, and the timer removed from list, it is
* possible to set timer->base = NULL and drop the lock: the timer remains
* locked.
*/
static timer_base_t *lock_timer_base(struct timer_list *timer,
unsigned long *flags)
{
timer_base_t *base;
for (;;) {
base = timer->base;
if (likely(base != NULL)) {
spin_lock_irqsave(&base->lock, *flags);
if (likely(base == timer->base))
return base;
/* The timer has migrated to another CPU */
spin_unlock_irqrestore(&base->lock, *flags);
}
cpu_relax();
}
}
int __mod_timer(struct timer_list *timer, unsigned long expires)
{
tvec_base_t *old_base, *new_base;
timer_base_t *base;
tvec_base_t *new_base;
unsigned long flags;
int ret = 0;
BUG_ON(!timer->function);
check_timer(timer);
spin_lock_irqsave(&timer->lock, flags);
new_base = &__get_cpu_var(tvec_bases);
repeat:
old_base = timer->base;
base = lock_timer_base(timer, &flags);
/*
* Prevent deadlocks via ordering by old_base < new_base.
*/
if (old_base && (new_base != old_base)) {
if (old_base < new_base) {
spin_lock(&new_base->lock);
spin_lock(&old_base->lock);
} else {
spin_lock(&old_base->lock);
spin_lock(&new_base->lock);
}
/*
* The timer base might have been cancelled while we were
* trying to take the lock(s):
*/
if (timer->base != old_base) {
spin_unlock(&new_base->lock);
spin_unlock(&old_base->lock);
goto repeat;
}
} else {
spin_lock(&new_base->lock);
if (timer->base != old_base) {
spin_unlock(&new_base->lock);
goto repeat;
}
}
/*
* Delete the previous timeout (if there was any), and install
* the new one:
*/
if (old_base) {
list_del(&timer->entry);
if (timer_pending(timer)) {
detach_timer(timer, 0);
ret = 1;
}
new_base = &__get_cpu_var(tvec_bases);
if (base != &new_base->t_base) {
/*
* We are trying to schedule the timer on the local CPU.
* However we can't change timer's base while it is running,
* otherwise del_timer_sync() can't detect that the timer's
* handler yet has not finished. This also guarantees that
* the timer is serialized wrt itself.
*/
if (unlikely(base->running_timer == timer)) {
/* The timer remains on a former base */
new_base = container_of(base, tvec_base_t, t_base);
} else {
/* See the comment in lock_timer_base() */
timer->base = NULL;
spin_unlock(&base->lock);
spin_lock(&new_base->t_base.lock);
timer->base = &new_base->t_base;
}
}
timer->expires = expires;
internal_add_timer(new_base, timer);
timer->base = new_base;
if (old_base && (new_base != old_base))
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
spin_unlock_irqrestore(&timer->lock, flags);
spin_unlock_irqrestore(&new_base->t_base.lock, flags);
return ret;
}
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
{
tvec_base_t *base = &per_cpu(tvec_bases, cpu);
unsigned long flags;
BUG_ON(timer_pending(timer) || !timer->function);
check_timer(timer);
spin_lock_irqsave(&base->lock, flags);
spin_lock_irqsave(&base->t_base.lock, flags);
timer->base = &base->t_base;
internal_add_timer(base, timer);
timer->base = base;
spin_unlock_irqrestore(&base->lock, flags);
spin_unlock_irqrestore(&base->t_base.lock, flags);
}
@@ -295,32 +344,55 @@ EXPORT_SYMBOL(mod_timer);
*/
int del_timer(struct timer_list *timer)
{
timer_base_t *base;
unsigned long flags;
tvec_base_t *base;
int ret = 0;
check_timer(timer);
repeat:
base = timer->base;
if (!base)
return 0;
spin_lock_irqsave(&base->lock, flags);
if (base != timer->base) {
if (timer_pending(timer)) {
base = lock_timer_base(timer, &flags);
if (timer_pending(timer)) {
detach_timer(timer, 1);
ret = 1;
}
spin_unlock_irqrestore(&base->lock, flags);
goto repeat;
}
list_del(&timer->entry);
/* Need to make sure that anybody who sees a NULL base also sees the list ops */
smp_wmb();
timer->base = NULL;
spin_unlock_irqrestore(&base->lock, flags);
return 1;
return ret;
}
EXPORT_SYMBOL(del_timer);
#ifdef CONFIG_SMP
/*
* This function tries to deactivate a timer. Upon successful (ret >= 0)
* exit the timer is not queued and the handler is not running on any CPU.
*
* It must not be called from interrupt contexts.
*/
int try_to_del_timer_sync(struct timer_list *timer)
{
timer_base_t *base;
unsigned long flags;
int ret = -1;
base = lock_timer_base(timer, &flags);
if (base->running_timer == timer)
goto out;
ret = 0;
if (timer_pending(timer)) {
detach_timer(timer, 1);
ret = 1;
}
out:
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
/***
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -332,72 +404,24 @@ EXPORT_SYMBOL(del_timer);
* Synchronization rules: callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
* interrupt contexts. The caller must not hold locks which would prevent
* completion of the timer's handler. Upon exit the timer is not queued and
* the handler is not running on any CPU.
* completion of the timer's handler. The timer's handler must not call
* add_timer_on(). Upon exit the timer is not queued and the handler is
* not running on any CPU.
*
* The function returns whether it has deactivated a pending timer or not.
*
* del_timer_sync() is slow and complicated because it copes with timer
* handlers which re-arm the timer (periodic timers). If the timer handler
* is known to not do this (a single shot timer) then use
* del_singleshot_timer_sync() instead.
*/
int del_timer_sync(struct timer_list *timer)
{
tvec_base_t *base;
int i, ret = 0;
check_timer(timer);
del_again:
ret += del_timer(timer);
for_each_online_cpu(i) {
base = &per_cpu(tvec_bases, i);
if (base->running_timer == timer) {
while (base->running_timer == timer) {
cpu_relax();
preempt_check_resched();
}
break;
}
for (;;) {
int ret = try_to_del_timer_sync(timer);
if (ret >= 0)
return ret;
}
smp_rmb();
if (timer_pending(timer))
goto del_again;
return ret;
}
EXPORT_SYMBOL(del_timer_sync);
/***
* del_singleshot_timer_sync - deactivate a non-recursive timer
* @timer: the timer to be deactivated
*
* This function is an optimization of del_timer_sync for the case where the
* caller can guarantee the timer does not reschedule itself in its timer
* function.
*
* Synchronization rules: callers must prevent restarting of the timer,
* otherwise this function is meaningless. It must not be called from
* interrupt contexts. The caller must not hold locks which wold prevent
* completion of the timer's handler. Upon exit the timer is not queued and
* the handler is not running on any CPU.
*
* The function returns whether it has deactivated a pending timer or not.
*/
int del_singleshot_timer_sync(struct timer_list *timer)
{
int ret = del_timer(timer);
if (!ret) {
ret = del_timer_sync(timer);
BUG_ON(ret);
}
return ret;
}
EXPORT_SYMBOL(del_singleshot_timer_sync);
#endif
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
@@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
struct timer_list *tmp;
tmp = list_entry(curr, struct timer_list, entry);
BUG_ON(tmp->base != base);
BUG_ON(tmp->base != &base->t_base);
curr = curr->next;
internal_add_timer(base, tmp);
}
@@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base)
{
struct timer_list *timer;
spin_lock_irq(&base->lock);
spin_lock_irq(&base->t_base.lock);
while (time_after_eq(jiffies, base->timer_jiffies)) {
struct list_head work_list = LIST_HEAD_INIT(work_list);
struct list_head *head = &work_list;
@@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base)
cascade(base, &base->tv5, INDEX(3));
++base->timer_jiffies;
list_splice_init(base->tv1.vec + index, &work_list);
repeat:
if (!list_empty(head)) {
while (!list_empty(head)) {
void (*fn)(unsigned long);
unsigned long data;
@@ -462,25 +485,26 @@ repeat:
fn = timer->function;
data = timer->data;
list_del(&timer->entry);
set_running_timer(base, timer);
smp_wmb();
timer->base = NULL;
spin_unlock_irq(&base->lock);
detach_timer(timer, 1);
spin_unlock_irq(&base->t_base.lock);
{
u32 preempt_count = preempt_count();
int preempt_count = preempt_count();
fn(data);
if (preempt_count != preempt_count()) {
printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
printk(KERN_WARNING "huh, entered %p "
"with preempt_count %08x, exited"
" with %08x?\n",
fn, preempt_count,
preempt_count());
BUG();
}
}
spin_lock_irq(&base->lock);
goto repeat;
spin_lock_irq(&base->t_base.lock);
}
}
set_running_timer(base, NULL);
spin_unlock_irq(&base->lock);
spin_unlock_irq(&base->t_base.lock);
}
#ifdef CONFIG_NO_IDLE_HZ
@@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void)
int i, j;
base = &__get_cpu_var(tvec_bases);
spin_lock(&base->lock);
spin_lock(&base->t_base.lock);
expires = base->timer_jiffies + (LONG_MAX >> 1);
list = 0;
@@ -547,7 +571,7 @@ found:
expires = nte->expires;
}
}
spin_unlock(&base->lock);
spin_unlock(&base->t_base.lock);
return expires;
}
#endif
@@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu)
{
int j;
tvec_base_t *base;
base = &per_cpu(tvec_bases, cpu);
spin_lock_init(&base->lock);
spin_lock_init(&base->t_base.lock);
for (j = 0; j < TVN_SIZE; j++) {
INIT_LIST_HEAD(base->tv5.vec + j);
INIT_LIST_HEAD(base->tv4.vec + j);
@@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu)
}
#ifdef CONFIG_HOTPLUG_CPU
static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
{
struct timer_list *timer;
while (!list_empty(head)) {
timer = list_entry(head->next, struct timer_list, entry);
/* We're locking backwards from __mod_timer order here,
beware deadlock. */
if (!spin_trylock(&timer->lock))
return 0;
list_del(&timer->entry);
detach_timer(timer, 0);
timer->base = &new_base->t_base;
internal_add_timer(new_base, timer);
timer->base = new_base;
spin_unlock(&timer->lock);
}
return 1;
}
static void __devinit migrate_timers(int cpu)
@@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu)
new_base = &get_cpu_var(tvec_bases);
local_irq_disable();
again:
/* Prevent deadlocks via ordering by old_base < new_base. */
if (old_base < new_base) {
spin_lock(&new_base->lock);
spin_lock(&old_base->lock);
} else {
spin_lock(&old_base->lock);
spin_lock(&new_base->lock);
}
spin_lock(&new_base->t_base.lock);
spin_lock(&old_base->t_base.lock);
if (old_base->running_timer)
if (old_base->t_base.running_timer)
BUG();
for (i = 0; i < TVR_SIZE; i++)
if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
goto unlock_again;
for (i = 0; i < TVN_SIZE; i++)
if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
|| !migrate_timer_list(new_base, old_base->tv3.vec + i)
|| !migrate_timer_list(new_base, old_base->tv4.vec + i)
|| !migrate_timer_list(new_base, old_base->tv5.vec + i))
goto unlock_again;
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
migrate_timer_list(new_base, old_base->tv1.vec + i);
for (i = 0; i < TVN_SIZE; i++) {
migrate_timer_list(new_base, old_base->tv2.vec + i);
migrate_timer_list(new_base, old_base->tv3.vec + i);
migrate_timer_list(new_base, old_base->tv4.vec + i);
migrate_timer_list(new_base, old_base->tv5.vec + i);
}
spin_unlock(&old_base->t_base.lock);
spin_unlock(&new_base->t_base.lock);
local_irq_enable();
put_cpu_var(tvec_bases);
return;
unlock_again:
/* Avoid deadlock with __mod_timer, by backing off. */
spin_unlock(&old_base->lock);
spin_unlock(&new_base->lock);
cpu_relax();
goto again;
}
#endif /* CONFIG_HOTPLUG_CPU */
@@ -1594,7 +1597,7 @@ void msleep(unsigned int msecs)
EXPORT_SYMBOL(msleep);
/**
* msleep_interruptible - sleep waiting for waitqueue interruptions
* msleep_interruptible - sleep waiting for signals
* @msecs: Time in milliseconds to sleep for
*/
unsigned long msleep_interruptible(unsigned int msecs)