Merge with master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
This commit is contained in:
46
kernel/Kconfig.hz
Normal file
46
kernel/Kconfig.hz
Normal file
@@ -0,0 +1,46 @@
|
||||
#
|
||||
# Timer Interrupt Frequency Configuration
|
||||
#
|
||||
|
||||
choice
|
||||
prompt "Timer frequency"
|
||||
default HZ_250
|
||||
help
|
||||
Allows the configuration of the timer frequency. It is customary
|
||||
to have the timer interrupt run at 1000 HZ but 100 HZ may be more
|
||||
beneficial for servers and NUMA systems that do not need to have
|
||||
a fast response for user interaction and that may experience bus
|
||||
contention and cacheline bounces as a result of timer interrupts.
|
||||
Note that the timer interrupt occurs on each processor in an SMP
|
||||
environment leading to NR_CPUS * HZ number of timer interrupts
|
||||
per second.
|
||||
|
||||
|
||||
config HZ_100
|
||||
bool "100 HZ"
|
||||
help
|
||||
100 HZ is a typical choice for servers, SMP and NUMA systems
|
||||
with lots of processors that may show reduced performance if
|
||||
too many timer interrupts are occurring.
|
||||
|
||||
config HZ_250
|
||||
bool "250 HZ"
|
||||
help
|
||||
250 HZ is a good compromise choice allowing server performance
|
||||
while also showing good interactive responsiveness even
|
||||
on SMP and NUMA systems.
|
||||
|
||||
config HZ_1000
|
||||
bool "1000 HZ"
|
||||
help
|
||||
1000 HZ is the preferred choice for desktop systems and other
|
||||
systems requiring fast interactive responses to events.
|
||||
|
||||
endchoice
|
||||
|
||||
config HZ
|
||||
int
|
||||
default 100 if HZ_100
|
||||
default 250 if HZ_250
|
||||
default 1000 if HZ_1000
|
||||
|
65
kernel/Kconfig.preempt
Normal file
65
kernel/Kconfig.preempt
Normal file
@@ -0,0 +1,65 @@
|
||||
|
||||
choice
|
||||
prompt "Preemption Model"
|
||||
default PREEMPT_NONE
|
||||
|
||||
config PREEMPT_NONE
|
||||
bool "No Forced Preemption (Server)"
|
||||
help
|
||||
This is the traditional Linux preemption model, geared towards
|
||||
throughput. It will still provide good latencies most of the
|
||||
time, but there are no guarantees and occasional longer delays
|
||||
are possible.
|
||||
|
||||
Select this option if you are building a kernel for a server or
|
||||
scientific/computation system, or if you want to maximize the
|
||||
raw processing power of the kernel, irrespective of scheduling
|
||||
latencies.
|
||||
|
||||
config PREEMPT_VOLUNTARY
|
||||
bool "Voluntary Kernel Preemption (Desktop)"
|
||||
help
|
||||
This option reduces the latency of the kernel by adding more
|
||||
"explicit preemption points" to the kernel code. These new
|
||||
preemption points have been selected to reduce the maximum
|
||||
latency of rescheduling, providing faster application reactions,
|
||||
at the cost of slighly lower throughput.
|
||||
|
||||
This allows reaction to interactive events by allowing a
|
||||
low priority process to voluntarily preempt itself even if it
|
||||
is in kernel mode executing a system call. This allows
|
||||
applications to run more 'smoothly' even when the system is
|
||||
under load.
|
||||
|
||||
Select this if you are building a kernel for a desktop system.
|
||||
|
||||
config PREEMPT
|
||||
bool "Preemptible Kernel (Low-Latency Desktop)"
|
||||
help
|
||||
This option reduces the latency of the kernel by making
|
||||
all kernel code (that is not executing in a critical section)
|
||||
preemptible. This allows reaction to interactive events by
|
||||
permitting a low priority process to be preempted involuntarily
|
||||
even if it is in kernel mode executing a system call and would
|
||||
otherwise not be about to reach a natural preemption point.
|
||||
This allows applications to run more 'smoothly' even when the
|
||||
system is under load, at the cost of slighly lower throughput
|
||||
and a slight runtime overhead to kernel code.
|
||||
|
||||
Select this if you are building a kernel for a desktop or
|
||||
embedded system with latency requirements in the milliseconds
|
||||
range.
|
||||
|
||||
endchoice
|
||||
|
||||
config PREEMPT_BKL
|
||||
bool "Preempt The Big Kernel Lock"
|
||||
depends on SMP || PREEMPT
|
||||
default y
|
||||
help
|
||||
This option reduces the latency of the kernel by making the
|
||||
big kernel lock preemptible.
|
||||
|
||||
Say Y here if you are building a kernel for a desktop system.
|
||||
Say N if you are unsure.
|
||||
|
@@ -17,6 +17,7 @@ obj-$(CONFIG_MODULES) += module.o
|
||||
obj-$(CONFIG_KALLSYMS) += kallsyms.o
|
||||
obj-$(CONFIG_PM) += power/
|
||||
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
|
||||
obj-$(CONFIG_KEXEC) += kexec.o
|
||||
obj-$(CONFIG_COMPAT) += compat.o
|
||||
obj-$(CONFIG_CPUSETS) += cpuset.o
|
||||
obj-$(CONFIG_IKCONFIG) += configs.o
|
||||
@@ -27,6 +28,7 @@ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
|
||||
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
|
||||
|
14
kernel/cpu.c
14
kernel/cpu.c
@@ -63,19 +63,15 @@ static int take_cpu_down(void *unused)
|
||||
{
|
||||
int err;
|
||||
|
||||
/* Take offline: makes arch_cpu_down somewhat easier. */
|
||||
cpu_clear(smp_processor_id(), cpu_online_map);
|
||||
|
||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||
err = __cpu_disable();
|
||||
if (err < 0)
|
||||
cpu_set(smp_processor_id(), cpu_online_map);
|
||||
else
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
return err;
|
||||
|
||||
return err;
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpu_down(unsigned int cpu)
|
||||
|
@@ -228,13 +228,7 @@ static struct dentry_operations cpuset_dops = {
|
||||
|
||||
static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
|
||||
{
|
||||
struct qstr qstr;
|
||||
struct dentry *d;
|
||||
|
||||
qstr.name = name;
|
||||
qstr.len = strlen(name);
|
||||
qstr.hash = full_name_hash(name, qstr.len);
|
||||
d = lookup_hash(&qstr, parent);
|
||||
struct dentry *d = lookup_one_len(name, parent, strlen(name));
|
||||
if (!IS_ERR(d))
|
||||
d->d_op = &cpuset_dops;
|
||||
return d;
|
||||
@@ -601,10 +595,62 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For a given cpuset cur, partition the system as follows
|
||||
* a. All cpus in the parent cpuset's cpus_allowed that are not part of any
|
||||
* exclusive child cpusets
|
||||
* b. All cpus in the current cpuset's cpus_allowed that are not part of any
|
||||
* exclusive child cpusets
|
||||
* Build these two partitions by calling partition_sched_domains
|
||||
*
|
||||
* Call with cpuset_sem held. May nest a call to the
|
||||
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
|
||||
*/
|
||||
static void update_cpu_domains(struct cpuset *cur)
|
||||
{
|
||||
struct cpuset *c, *par = cur->parent;
|
||||
cpumask_t pspan, cspan;
|
||||
|
||||
if (par == NULL || cpus_empty(cur->cpus_allowed))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Get all cpus from parent's cpus_allowed not part of exclusive
|
||||
* children
|
||||
*/
|
||||
pspan = par->cpus_allowed;
|
||||
list_for_each_entry(c, &par->children, sibling) {
|
||||
if (is_cpu_exclusive(c))
|
||||
cpus_andnot(pspan, pspan, c->cpus_allowed);
|
||||
}
|
||||
if (is_removed(cur) || !is_cpu_exclusive(cur)) {
|
||||
cpus_or(pspan, pspan, cur->cpus_allowed);
|
||||
if (cpus_equal(pspan, cur->cpus_allowed))
|
||||
return;
|
||||
cspan = CPU_MASK_NONE;
|
||||
} else {
|
||||
if (cpus_empty(pspan))
|
||||
return;
|
||||
cspan = cur->cpus_allowed;
|
||||
/*
|
||||
* Get all cpus from current cpuset's cpus_allowed not part
|
||||
* of exclusive children
|
||||
*/
|
||||
list_for_each_entry(c, &cur->children, sibling) {
|
||||
if (is_cpu_exclusive(c))
|
||||
cpus_andnot(cspan, cspan, c->cpus_allowed);
|
||||
}
|
||||
}
|
||||
|
||||
lock_cpu_hotplug();
|
||||
partition_sched_domains(&pspan, &cspan);
|
||||
unlock_cpu_hotplug();
|
||||
}
|
||||
|
||||
static int update_cpumask(struct cpuset *cs, char *buf)
|
||||
{
|
||||
struct cpuset trialcs;
|
||||
int retval;
|
||||
int retval, cpus_unchanged;
|
||||
|
||||
trialcs = *cs;
|
||||
retval = cpulist_parse(buf, trialcs.cpus_allowed);
|
||||
@@ -614,9 +660,13 @@ static int update_cpumask(struct cpuset *cs, char *buf)
|
||||
if (cpus_empty(trialcs.cpus_allowed))
|
||||
return -ENOSPC;
|
||||
retval = validate_change(cs, &trialcs);
|
||||
if (retval == 0)
|
||||
cs->cpus_allowed = trialcs.cpus_allowed;
|
||||
return retval;
|
||||
if (retval < 0)
|
||||
return retval;
|
||||
cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
|
||||
cs->cpus_allowed = trialcs.cpus_allowed;
|
||||
if (is_cpu_exclusive(cs) && !cpus_unchanged)
|
||||
update_cpu_domains(cs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int update_nodemask(struct cpuset *cs, char *buf)
|
||||
@@ -652,7 +702,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
|
||||
{
|
||||
int turning_on;
|
||||
struct cpuset trialcs;
|
||||
int err;
|
||||
int err, cpu_exclusive_changed;
|
||||
|
||||
turning_on = (simple_strtoul(buf, NULL, 10) != 0);
|
||||
|
||||
@@ -663,13 +713,18 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf)
|
||||
clear_bit(bit, &trialcs.flags);
|
||||
|
||||
err = validate_change(cs, &trialcs);
|
||||
if (err == 0) {
|
||||
if (turning_on)
|
||||
set_bit(bit, &cs->flags);
|
||||
else
|
||||
clear_bit(bit, &cs->flags);
|
||||
}
|
||||
return err;
|
||||
if (err < 0)
|
||||
return err;
|
||||
cpu_exclusive_changed =
|
||||
(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
|
||||
if (turning_on)
|
||||
set_bit(bit, &cs->flags);
|
||||
else
|
||||
clear_bit(bit, &cs->flags);
|
||||
|
||||
if (cpu_exclusive_changed)
|
||||
update_cpu_domains(cs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int attach_task(struct cpuset *cs, char *buf)
|
||||
@@ -1315,12 +1370,14 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
|
||||
up(&cpuset_sem);
|
||||
return -EBUSY;
|
||||
}
|
||||
spin_lock(&cs->dentry->d_lock);
|
||||
parent = cs->parent;
|
||||
set_bit(CS_REMOVED, &cs->flags);
|
||||
if (is_cpu_exclusive(cs))
|
||||
update_cpu_domains(cs);
|
||||
list_del(&cs->sibling); /* delete my sibling from parent->children */
|
||||
if (list_empty(&parent->children))
|
||||
check_for_release(parent);
|
||||
spin_lock(&cs->dentry->d_lock);
|
||||
d = dget(cs->dentry);
|
||||
cs->dentry = NULL;
|
||||
spin_unlock(&d->d_lock);
|
||||
|
52
kernel/crash_dump.c
Normal file
52
kernel/crash_dump.c
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* kernel/crash_dump.c - Memory preserving reboot related code.
|
||||
*
|
||||
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
|
||||
* Copyright (C) IBM Corporation, 2004. All rights reserved
|
||||
*/
|
||||
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/crash_dump.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
/* Stores the physical address of elf header of crash image. */
|
||||
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
|
||||
|
||||
/*
|
||||
* Copy a page from "oldmem". For this page, there is no pte mapped
|
||||
* in the current kernel. We stitch up a pte, similar to kmap_atomic.
|
||||
*/
|
||||
ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
|
||||
size_t csize, unsigned long offset, int userbuf)
|
||||
{
|
||||
void *page, *vaddr;
|
||||
|
||||
if (!csize)
|
||||
return 0;
|
||||
|
||||
page = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
|
||||
vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
|
||||
copy_page(page, vaddr);
|
||||
kunmap_atomic(vaddr, KM_PTE0);
|
||||
|
||||
if (userbuf) {
|
||||
if (copy_to_user(buf, (page + offset), csize)) {
|
||||
kfree(page);
|
||||
return -EFAULT;
|
||||
}
|
||||
} else {
|
||||
memcpy(buf, (page + offset), csize);
|
||||
}
|
||||
|
||||
kfree(page);
|
||||
return csize;
|
||||
}
|
@@ -72,6 +72,11 @@ repeat:
|
||||
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
|
||||
__exit_signal(p);
|
||||
__exit_sighand(p);
|
||||
/*
|
||||
* Note that the fastpath in sys_times depends on __exit_signal having
|
||||
* updated the counters before a task is removed from the tasklist of
|
||||
* the process by __unhash_process.
|
||||
*/
|
||||
__unhash_process(p);
|
||||
|
||||
/*
|
||||
@@ -779,6 +784,8 @@ fastcall NORET_TYPE void do_exit(long code)
|
||||
|
||||
profile_task_exit(tsk);
|
||||
|
||||
WARN_ON(atomic_read(&tsk->fs_excl));
|
||||
|
||||
if (unlikely(in_interrupt()))
|
||||
panic("Aiee, killing interrupt handler!");
|
||||
if (unlikely(!tsk->pid))
|
||||
@@ -793,6 +800,17 @@ fastcall NORET_TYPE void do_exit(long code)
|
||||
ptrace_notify((PTRACE_EVENT_EXIT << 8) | SIGTRAP);
|
||||
}
|
||||
|
||||
/*
|
||||
* We're taking recursive faults here in do_exit. Safest is to just
|
||||
* leave this task alone and wait for reboot.
|
||||
*/
|
||||
if (unlikely(tsk->flags & PF_EXITING)) {
|
||||
printk(KERN_ALERT
|
||||
"Fixing recursive fault but reboot is needed!\n");
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
|
||||
tsk->flags |= PF_EXITING;
|
||||
|
||||
/*
|
||||
|
@@ -194,6 +194,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
|
||||
mm->mmap = NULL;
|
||||
mm->mmap_cache = NULL;
|
||||
mm->free_area_cache = oldmm->mmap_base;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
mm->map_count = 0;
|
||||
set_mm_counter(mm, rss, 0);
|
||||
set_mm_counter(mm, anon_rss, 0);
|
||||
@@ -249,8 +250,9 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
|
||||
|
||||
/*
|
||||
* Link in the new vma and copy the page table entries:
|
||||
* link in first so that swapoff can see swap entries,
|
||||
* and try_to_unmap_one's find_vma find the new vma.
|
||||
* link in first so that swapoff can see swap entries.
|
||||
* Note that, exceptionally, here the vma is inserted
|
||||
* without holding mm->mmap_sem.
|
||||
*/
|
||||
spin_lock(&mm->page_table_lock);
|
||||
*pprev = tmp;
|
||||
@@ -322,6 +324,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
|
||||
mm->ioctx_list = NULL;
|
||||
mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
|
||||
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
|
||||
if (likely(!mm_alloc_pgd(mm))) {
|
||||
mm->def_flags = 0;
|
||||
@@ -1000,9 +1003,6 @@ static task_t *copy_process(unsigned long clone_flags,
|
||||
p->pdeath_signal = 0;
|
||||
p->exit_state = 0;
|
||||
|
||||
/* Perform scheduler related setup */
|
||||
sched_fork(p);
|
||||
|
||||
/*
|
||||
* Ok, make it visible to the rest of the system.
|
||||
* We dont wake it up yet.
|
||||
@@ -1011,18 +1011,24 @@ static task_t *copy_process(unsigned long clone_flags,
|
||||
INIT_LIST_HEAD(&p->ptrace_children);
|
||||
INIT_LIST_HEAD(&p->ptrace_list);
|
||||
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
sched_fork(p, clone_flags);
|
||||
|
||||
/* Need tasklist lock for parent etc handling! */
|
||||
write_lock_irq(&tasklist_lock);
|
||||
|
||||
/*
|
||||
* The task hasn't been attached yet, so cpus_allowed mask cannot
|
||||
* have changed. The cpus_allowed mask of the parent may have
|
||||
* changed after it was copied first time, and it may then move to
|
||||
* another CPU - so we re-copy it here and set the child's CPU to
|
||||
* the parent's CPU. This avoids alot of nasty races.
|
||||
* The task hasn't been attached yet, so its cpus_allowed mask will
|
||||
* not be changed, nor will its assigned CPU.
|
||||
*
|
||||
* The cpus_allowed mask of the parent may have changed after it was
|
||||
* copied first time - so re-copy it here, then check the child's CPU
|
||||
* to ensure it is on a valid CPU (and if not, just force it back to
|
||||
* parent's CPU). This avoids alot of nasty races.
|
||||
*/
|
||||
p->cpus_allowed = current->cpus_allowed;
|
||||
set_task_cpu(p, smp_processor_id());
|
||||
if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed)))
|
||||
set_task_cpu(p, smp_processor_id());
|
||||
|
||||
/*
|
||||
* Check for pending SIGKILL! The new thread should not be allowed
|
||||
@@ -1084,6 +1090,11 @@ static task_t *copy_process(unsigned long clone_flags,
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
}
|
||||
|
||||
/*
|
||||
* inherit ioprio
|
||||
*/
|
||||
p->ioprio = current->ioprio;
|
||||
|
||||
SET_LINKS(p);
|
||||
if (unlikely(p->ptrace & PT_PTRACED))
|
||||
__ptrace_link(p, current->parent);
|
||||
|
@@ -9,6 +9,7 @@
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/delay.h>
|
||||
|
||||
/*
|
||||
* Autodetection depends on the fact that any interrupt that
|
||||
@@ -26,7 +27,7 @@ static DECLARE_MUTEX(probe_sem);
|
||||
*/
|
||||
unsigned long probe_irq_on(void)
|
||||
{
|
||||
unsigned long val, delay;
|
||||
unsigned long val;
|
||||
irq_desc_t *desc;
|
||||
unsigned int i;
|
||||
|
||||
@@ -45,8 +46,7 @@ unsigned long probe_irq_on(void)
|
||||
}
|
||||
|
||||
/* Wait for longstanding interrupts to trigger. */
|
||||
for (delay = jiffies + HZ/50; time_after(delay, jiffies); )
|
||||
/* about 20ms delay */ barrier();
|
||||
msleep(20);
|
||||
|
||||
/*
|
||||
* enable any unassigned irqs
|
||||
@@ -68,8 +68,7 @@ unsigned long probe_irq_on(void)
|
||||
/*
|
||||
* Wait for spurious interrupts to trigger
|
||||
*/
|
||||
for (delay = jiffies + HZ/10; time_after(delay, jiffies); )
|
||||
/* about 100ms delay */ barrier();
|
||||
msleep(100);
|
||||
|
||||
/*
|
||||
* Now filter out any obviously spurious interrupts
|
||||
|
@@ -172,7 +172,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
|
||||
|
||||
spin_lock(&desc->lock);
|
||||
if (!noirqdebug)
|
||||
note_interrupt(irq, desc, action_ret);
|
||||
note_interrupt(irq, desc, action_ret, regs);
|
||||
if (likely(!(desc->status & IRQ_PENDING)))
|
||||
break;
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
|
@@ -6,6 +6,7 @@
|
||||
* This file contains driver APIs to the irq subsystem.
|
||||
*/
|
||||
|
||||
#include <linux/config.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
@@ -255,6 +256,13 @@ void free_irq(unsigned int irq, void *dev_id)
|
||||
|
||||
/* Found it - now remove it from the list of entries */
|
||||
*pp = action->next;
|
||||
|
||||
/* Currently used only by UML, might disappear one day.*/
|
||||
#ifdef CONFIG_IRQ_RELEASE_METHOD
|
||||
if (desc->handler->release)
|
||||
desc->handler->release(irq, dev_id);
|
||||
#endif
|
||||
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
if (desc->handler->shutdown)
|
||||
|
@@ -11,6 +11,83 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/interrupt.h>
|
||||
|
||||
static int irqfixup;
|
||||
|
||||
/*
|
||||
* Recovery handler for misrouted interrupts.
|
||||
*/
|
||||
|
||||
static int misrouted_irq(int irq, struct pt_regs *regs)
|
||||
{
|
||||
int i;
|
||||
irq_desc_t *desc;
|
||||
int ok = 0;
|
||||
int work = 0; /* Did we do work for a real IRQ */
|
||||
|
||||
for(i = 1; i < NR_IRQS; i++) {
|
||||
struct irqaction *action;
|
||||
|
||||
if (i == irq) /* Already tried */
|
||||
continue;
|
||||
desc = &irq_desc[i];
|
||||
spin_lock(&desc->lock);
|
||||
action = desc->action;
|
||||
/* Already running on another processor */
|
||||
if (desc->status & IRQ_INPROGRESS) {
|
||||
/*
|
||||
* Already running: If it is shared get the other
|
||||
* CPU to go looking for our mystery interrupt too
|
||||
*/
|
||||
if (desc->action && (desc->action->flags & SA_SHIRQ))
|
||||
desc->status |= IRQ_PENDING;
|
||||
spin_unlock(&desc->lock);
|
||||
continue;
|
||||
}
|
||||
/* Honour the normal IRQ locking */
|
||||
desc->status |= IRQ_INPROGRESS;
|
||||
spin_unlock(&desc->lock);
|
||||
while (action) {
|
||||
/* Only shared IRQ handlers are safe to call */
|
||||
if (action->flags & SA_SHIRQ) {
|
||||
if (action->handler(i, action->dev_id, regs) ==
|
||||
IRQ_HANDLED)
|
||||
ok = 1;
|
||||
}
|
||||
action = action->next;
|
||||
}
|
||||
local_irq_disable();
|
||||
/* Now clean up the flags */
|
||||
spin_lock(&desc->lock);
|
||||
action = desc->action;
|
||||
|
||||
/*
|
||||
* While we were looking for a fixup someone queued a real
|
||||
* IRQ clashing with our walk
|
||||
*/
|
||||
|
||||
while ((desc->status & IRQ_PENDING) && action) {
|
||||
/*
|
||||
* Perform real IRQ processing for the IRQ we deferred
|
||||
*/
|
||||
work = 1;
|
||||
spin_unlock(&desc->lock);
|
||||
handle_IRQ_event(i, regs, action);
|
||||
spin_lock(&desc->lock);
|
||||
desc->status &= ~IRQ_PENDING;
|
||||
}
|
||||
desc->status &= ~IRQ_INPROGRESS;
|
||||
/*
|
||||
* If we did actual work for the real IRQ line we must let the
|
||||
* IRQ controller clean up too
|
||||
*/
|
||||
if(work)
|
||||
desc->handler->end(i);
|
||||
spin_unlock(&desc->lock);
|
||||
}
|
||||
/* So the caller can adjust the irq error counts */
|
||||
return ok;
|
||||
}
|
||||
|
||||
/*
|
||||
* If 99,900 of the previous 100,000 interrupts have not been handled
|
||||
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
|
||||
@@ -31,7 +108,8 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
printk(KERN_ERR "irq event %d: bogus return value %x\n",
|
||||
irq, action_ret);
|
||||
} else {
|
||||
printk(KERN_ERR "irq %d: nobody cared!\n", irq);
|
||||
printk(KERN_ERR "irq %d: nobody cared (try booting with "
|
||||
"the \"irqpoll\" option)\n", irq);
|
||||
}
|
||||
dump_stack();
|
||||
printk(KERN_ERR "handlers:\n");
|
||||
@@ -45,7 +123,7 @@ __report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
}
|
||||
}
|
||||
|
||||
void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
static void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
{
|
||||
static int count = 100;
|
||||
|
||||
@@ -55,7 +133,8 @@ void report_bad_irq(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
}
|
||||
}
|
||||
|
||||
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
if (action_ret != IRQ_HANDLED) {
|
||||
desc->irqs_unhandled++;
|
||||
@@ -63,6 +142,15 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret)
|
||||
report_bad_irq(irq, desc, action_ret);
|
||||
}
|
||||
|
||||
if (unlikely(irqfixup)) {
|
||||
/* Don't punish working computers */
|
||||
if ((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE) {
|
||||
int ok = misrouted_irq(irq, regs);
|
||||
if (action_ret == IRQ_NONE)
|
||||
desc->irqs_unhandled -= ok;
|
||||
}
|
||||
}
|
||||
|
||||
desc->irq_count++;
|
||||
if (desc->irq_count < 100000)
|
||||
return;
|
||||
@@ -94,3 +182,24 @@ int __init noirqdebug_setup(char *str)
|
||||
|
||||
__setup("noirqdebug", noirqdebug_setup);
|
||||
|
||||
static int __init irqfixup_setup(char *str)
|
||||
{
|
||||
irqfixup = 1;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
|
||||
printk(KERN_WARNING "This may impact system performance.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("irqfixup", irqfixup_setup);
|
||||
|
||||
static int __init irqpoll_setup(char *str)
|
||||
{
|
||||
irqfixup = 2;
|
||||
printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
|
||||
"enabled\n");
|
||||
printk(KERN_WARNING "This may significantly impact system "
|
||||
"performance\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("irqpoll", irqpoll_setup);
|
||||
|
@@ -153,11 +153,15 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
|
||||
|
||||
switch (which) {
|
||||
case ITIMER_REAL:
|
||||
again:
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
interval = tsk->signal->it_real_incr;
|
||||
val = it_real_value(tsk->signal);
|
||||
if (val)
|
||||
del_timer_sync(&tsk->signal->real_timer);
|
||||
/* We are sharing ->siglock with it_real_fn() */
|
||||
if (try_to_del_timer_sync(&tsk->signal->real_timer) < 0) {
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
goto again;
|
||||
}
|
||||
tsk->signal->it_real_incr =
|
||||
timeval_to_jiffies(&value->it_interval);
|
||||
it_real_arm(tsk, timeval_to_jiffies(&value->it_value));
|
||||
|
1063
kernel/kexec.c
Normal file
1063
kernel/kexec.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -120,6 +120,7 @@ struct subprocess_info {
|
||||
char *path;
|
||||
char **argv;
|
||||
char **envp;
|
||||
struct key *ring;
|
||||
int wait;
|
||||
int retval;
|
||||
};
|
||||
@@ -130,16 +131,21 @@ struct subprocess_info {
|
||||
static int ____call_usermodehelper(void *data)
|
||||
{
|
||||
struct subprocess_info *sub_info = data;
|
||||
struct key *old_session;
|
||||
int retval;
|
||||
|
||||
/* Unblock all signals. */
|
||||
/* Unblock all signals and set the session keyring. */
|
||||
key_get(sub_info->ring);
|
||||
flush_signals(current);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
old_session = __install_session_keyring(current, sub_info->ring);
|
||||
flush_signal_handlers(current, 1);
|
||||
sigemptyset(¤t->blocked);
|
||||
recalc_sigpending();
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
key_put(old_session);
|
||||
|
||||
/* We can run anywhere, unlike our parent keventd(). */
|
||||
set_cpus_allowed(current, CPU_MASK_ALL);
|
||||
|
||||
@@ -211,10 +217,11 @@ static void __call_usermodehelper(void *data)
|
||||
}
|
||||
|
||||
/**
|
||||
* call_usermodehelper - start a usermode application
|
||||
* call_usermodehelper_keys - start a usermode application
|
||||
* @path: pathname for the application
|
||||
* @argv: null-terminated argument list
|
||||
* @envp: null-terminated environment list
|
||||
* @session_keyring: session keyring for process (NULL for an empty keyring)
|
||||
* @wait: wait for the application to finish and return status.
|
||||
*
|
||||
* Runs a user-space application. The application is started
|
||||
@@ -224,7 +231,8 @@ static void __call_usermodehelper(void *data)
|
||||
* Must be called from process context. Returns a negative error code
|
||||
* if program was not execed successfully, or 0.
|
||||
*/
|
||||
int call_usermodehelper(char *path, char **argv, char **envp, int wait)
|
||||
int call_usermodehelper_keys(char *path, char **argv, char **envp,
|
||||
struct key *session_keyring, int wait)
|
||||
{
|
||||
DECLARE_COMPLETION(done);
|
||||
struct subprocess_info sub_info = {
|
||||
@@ -232,6 +240,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
|
||||
.path = path,
|
||||
.argv = argv,
|
||||
.envp = envp,
|
||||
.ring = session_keyring,
|
||||
.wait = wait,
|
||||
.retval = 0,
|
||||
};
|
||||
@@ -247,7 +256,7 @@ int call_usermodehelper(char *path, char **argv, char **envp, int wait)
|
||||
wait_for_completion(&done);
|
||||
return sub_info.retval;
|
||||
}
|
||||
EXPORT_SYMBOL(call_usermodehelper);
|
||||
EXPORT_SYMBOL(call_usermodehelper_keys);
|
||||
|
||||
void __init usermodehelper_init(void)
|
||||
{
|
||||
|
360
kernel/kprobes.c
360
kernel/kprobes.c
@@ -27,12 +27,16 @@
|
||||
* interface to access function arguments.
|
||||
* 2004-Sep Prasanna S Panchamukhi <prasanna@in.ibm.com> Changed Kprobes
|
||||
* exceptions notifier to be first on the priority list.
|
||||
* 2005-May Hien Nguyen <hien@us.ibm.com>, Jim Keniston
|
||||
* <jkenisto@us.ibm.com> and Prasanna S Panchamukhi
|
||||
* <prasanna@in.ibm.com> added function-return probes.
|
||||
*/
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/kdebug.h>
|
||||
@@ -41,11 +45,112 @@
|
||||
#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
|
||||
|
||||
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
||||
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
|
||||
unsigned int kprobe_cpu = NR_CPUS;
|
||||
static DEFINE_SPINLOCK(kprobe_lock);
|
||||
static struct kprobe *curr_kprobe;
|
||||
|
||||
/*
|
||||
* kprobe->ainsn.insn points to the copy of the instruction to be
|
||||
* single-stepped. x86_64, POWER4 and above have no-exec support and
|
||||
* stepping on the instruction on a vmalloced/kmalloced/data page
|
||||
* is a recipe for disaster
|
||||
*/
|
||||
#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
|
||||
|
||||
struct kprobe_insn_page {
|
||||
struct hlist_node hlist;
|
||||
kprobe_opcode_t *insns; /* Page of instruction slots */
|
||||
char slot_used[INSNS_PER_PAGE];
|
||||
int nused;
|
||||
};
|
||||
|
||||
static struct hlist_head kprobe_insn_pages;
|
||||
|
||||
/**
|
||||
* get_insn_slot() - Find a slot on an executable page for an instruction.
|
||||
* We allocate an executable page if there's no room on existing ones.
|
||||
*/
|
||||
kprobe_opcode_t *get_insn_slot(void)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
|
||||
hlist_for_each(pos, &kprobe_insn_pages) {
|
||||
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
|
||||
if (kip->nused < INSNS_PER_PAGE) {
|
||||
int i;
|
||||
for (i = 0; i < INSNS_PER_PAGE; i++) {
|
||||
if (!kip->slot_used[i]) {
|
||||
kip->slot_used[i] = 1;
|
||||
kip->nused++;
|
||||
return kip->insns + (i * MAX_INSN_SIZE);
|
||||
}
|
||||
}
|
||||
/* Surprise! No unused slots. Fix kip->nused. */
|
||||
kip->nused = INSNS_PER_PAGE;
|
||||
}
|
||||
}
|
||||
|
||||
/* All out of space. Need to allocate a new page. Use slot 0.*/
|
||||
kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
|
||||
if (!kip) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use module_alloc so this page is within +/- 2GB of where the
|
||||
* kernel image and loaded module images reside. This is required
|
||||
* so x86_64 can correctly handle the %rip-relative fixups.
|
||||
*/
|
||||
kip->insns = module_alloc(PAGE_SIZE);
|
||||
if (!kip->insns) {
|
||||
kfree(kip);
|
||||
return NULL;
|
||||
}
|
||||
INIT_HLIST_NODE(&kip->hlist);
|
||||
hlist_add_head(&kip->hlist, &kprobe_insn_pages);
|
||||
memset(kip->slot_used, 0, INSNS_PER_PAGE);
|
||||
kip->slot_used[0] = 1;
|
||||
kip->nused = 1;
|
||||
return kip->insns;
|
||||
}
|
||||
|
||||
void free_insn_slot(kprobe_opcode_t *slot)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
|
||||
hlist_for_each(pos, &kprobe_insn_pages) {
|
||||
kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
|
||||
if (kip->insns <= slot &&
|
||||
slot < kip->insns + (INSNS_PER_PAGE * MAX_INSN_SIZE)) {
|
||||
int i = (slot - kip->insns) / MAX_INSN_SIZE;
|
||||
kip->slot_used[i] = 0;
|
||||
kip->nused--;
|
||||
if (kip->nused == 0) {
|
||||
/*
|
||||
* Page is no longer in use. Free it unless
|
||||
* it's the last one. We keep the last one
|
||||
* so as not to have to set it up again the
|
||||
* next time somebody inserts a probe.
|
||||
*/
|
||||
hlist_del(&kip->hlist);
|
||||
if (hlist_empty(&kprobe_insn_pages)) {
|
||||
INIT_HLIST_NODE(&kip->hlist);
|
||||
hlist_add_head(&kip->hlist,
|
||||
&kprobe_insn_pages);
|
||||
} else {
|
||||
module_free(NULL, kip->insns);
|
||||
kfree(kip);
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Locks kprobe: irqs must be disabled */
|
||||
void lock_kprobes(void)
|
||||
{
|
||||
@@ -78,22 +183,23 @@ struct kprobe *get_kprobe(void *addr)
|
||||
* Aggregate handlers for multiple kprobes support - these handlers
|
||||
* take care of invoking the individual kprobe handlers on p->list
|
||||
*/
|
||||
int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry(kp, &p->list, list) {
|
||||
if (kp->pre_handler) {
|
||||
curr_kprobe = kp;
|
||||
kp->pre_handler(kp, regs);
|
||||
curr_kprobe = NULL;
|
||||
if (kp->pre_handler(kp, regs))
|
||||
return 1;
|
||||
}
|
||||
curr_kprobe = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -107,7 +213,8 @@ void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
return;
|
||||
}
|
||||
|
||||
int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
|
||||
static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
{
|
||||
/*
|
||||
* if we faulted "during" the execution of a user specified
|
||||
@@ -120,19 +227,159 @@ int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp = curr_kprobe;
|
||||
if (curr_kprobe && kp->break_handler) {
|
||||
if (kp->break_handler(kp, regs)) {
|
||||
curr_kprobe = NULL;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
curr_kprobe = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
hlist_for_each_entry(ri, node, &rp->free_instances, uflist)
|
||||
return ri;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
hlist_for_each_entry(ri, node, &rp->used_instances, uflist)
|
||||
return ri;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void add_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
/*
|
||||
* Remove rp inst off the free list -
|
||||
* Add it back when probed function returns
|
||||
*/
|
||||
hlist_del(&ri->uflist);
|
||||
|
||||
/* Add rp inst onto table */
|
||||
INIT_HLIST_NODE(&ri->hlist);
|
||||
hlist_add_head(&ri->hlist,
|
||||
&kretprobe_inst_table[hash_ptr(ri->task, KPROBE_HASH_BITS)]);
|
||||
|
||||
/* Also add this rp inst to the used list. */
|
||||
INIT_HLIST_NODE(&ri->uflist);
|
||||
hlist_add_head(&ri->uflist, &ri->rp->used_instances);
|
||||
}
|
||||
|
||||
void recycle_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
/* remove rp inst off the rprobe_inst_table */
|
||||
hlist_del(&ri->hlist);
|
||||
if (ri->rp) {
|
||||
/* remove rp inst off the used list */
|
||||
hlist_del(&ri->uflist);
|
||||
/* put rp inst back onto the free list */
|
||||
INIT_HLIST_NODE(&ri->uflist);
|
||||
hlist_add_head(&ri->uflist, &ri->rp->free_instances);
|
||||
} else
|
||||
/* Unregistering */
|
||||
kfree(ri);
|
||||
}
|
||||
|
||||
struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
|
||||
{
|
||||
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from exit_thread or flush_thread when task tk's
|
||||
* stack is being recycled so that we can recycle any function-return probe
|
||||
* instances associated with this task. These left over instances represent
|
||||
* probed functions that have been called but will never return.
|
||||
*/
|
||||
void kprobe_flush_task(struct task_struct *tk)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node, *tmp;
|
||||
unsigned long flags = 0;
|
||||
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
head = kretprobe_inst_table_head(current);
|
||||
hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
|
||||
if (ri->task == tk)
|
||||
recycle_rp_inst(ri);
|
||||
}
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* This kprobe pre_handler is registered with every kretprobe. When probe
|
||||
* hits it will set up the return probe.
|
||||
*/
|
||||
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
|
||||
|
||||
/*TODO: consider to only swap the RA after the last pre_handler fired */
|
||||
arch_prepare_kretprobe(rp, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
while ((ri = get_free_rp_inst(rp)) != NULL) {
|
||||
hlist_del(&ri->uflist);
|
||||
kfree(ri);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Keep all fields in the kprobe consistent
|
||||
*/
|
||||
static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
{
|
||||
memcpy(&p->opcode, &old_p->opcode, sizeof(kprobe_opcode_t));
|
||||
memcpy(&p->ainsn, &old_p->ainsn, sizeof(struct arch_specific_insn));
|
||||
}
|
||||
|
||||
/*
|
||||
* Add the new probe to old_p->list. Fail if this is the
|
||||
* second jprobe at the address - two jprobes can't coexist
|
||||
*/
|
||||
static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
if (p->break_handler) {
|
||||
list_for_each_entry(kp, &old_p->list, list) {
|
||||
if (kp->break_handler)
|
||||
return -EEXIST;
|
||||
}
|
||||
list_add_tail(&p->list, &old_p->list);
|
||||
} else
|
||||
list_add(&p->list, &old_p->list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fill in the required fields of the "manager kprobe". Replace the
|
||||
* earlier kprobe in the hlist with the manager kprobe
|
||||
*/
|
||||
static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
copy_kprobe(p, ap);
|
||||
ap->addr = p->addr;
|
||||
ap->opcode = p->opcode;
|
||||
memcpy(&ap->ainsn, &p->ainsn, sizeof(struct arch_specific_insn));
|
||||
|
||||
ap->pre_handler = aggr_pre_handler;
|
||||
ap->post_handler = aggr_post_handler;
|
||||
ap->fault_handler = aggr_fault_handler;
|
||||
ap->break_handler = aggr_break_handler;
|
||||
|
||||
INIT_LIST_HEAD(&ap->list);
|
||||
list_add(&p->list, &ap->list);
|
||||
@@ -153,16 +400,16 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
int ret = 0;
|
||||
struct kprobe *ap;
|
||||
|
||||
if (old_p->break_handler || p->break_handler) {
|
||||
ret = -EEXIST; /* kprobe and jprobe can't (yet) coexist */
|
||||
} else if (old_p->pre_handler == aggr_pre_handler) {
|
||||
list_add(&p->list, &old_p->list);
|
||||
if (old_p->pre_handler == aggr_pre_handler) {
|
||||
copy_kprobe(old_p, p);
|
||||
ret = add_new_kprobe(old_p, p);
|
||||
} else {
|
||||
ap = kcalloc(1, sizeof(struct kprobe), GFP_ATOMIC);
|
||||
if (!ap)
|
||||
return -ENOMEM;
|
||||
add_aggr_kprobe(ap, old_p);
|
||||
list_add(&p->list, &ap->list);
|
||||
copy_kprobe(ap, p);
|
||||
ret = add_new_kprobe(ap, p);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -170,10 +417,8 @@ static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
/* kprobe removal house-keeping routines */
|
||||
static inline void cleanup_kprobe(struct kprobe *p, unsigned long flags)
|
||||
{
|
||||
*p->addr = p->opcode;
|
||||
arch_disarm_kprobe(p);
|
||||
hlist_del(&p->hlist);
|
||||
flush_icache_range((unsigned long) p->addr,
|
||||
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
arch_remove_kprobe(p);
|
||||
}
|
||||
@@ -200,6 +445,7 @@ int register_kprobe(struct kprobe *p)
|
||||
}
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
old_p = get_kprobe(p->addr);
|
||||
p->nmissed = 0;
|
||||
if (old_p) {
|
||||
ret = register_aggr_kprobe(old_p, p);
|
||||
goto out;
|
||||
@@ -210,10 +456,8 @@ int register_kprobe(struct kprobe *p)
|
||||
hlist_add_head(&p->hlist,
|
||||
&kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
|
||||
|
||||
p->opcode = *p->addr;
|
||||
*p->addr = BREAKPOINT_INSTRUCTION;
|
||||
flush_icache_range((unsigned long) p->addr,
|
||||
(unsigned long) p->addr + sizeof(kprobe_opcode_t));
|
||||
arch_arm_kprobe(p);
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
rm_kprobe:
|
||||
@@ -257,16 +501,83 @@ void unregister_jprobe(struct jprobe *jp)
|
||||
unregister_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
#ifdef ARCH_SUPPORTS_KRETPROBES
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kretprobe_instance *inst;
|
||||
int i;
|
||||
|
||||
rp->kp.pre_handler = pre_handler_kretprobe;
|
||||
|
||||
/* Pre-allocate memory for max kretprobe instances */
|
||||
if (rp->maxactive <= 0) {
|
||||
#ifdef CONFIG_PREEMPT
|
||||
rp->maxactive = max(10, 2 * NR_CPUS);
|
||||
#else
|
||||
rp->maxactive = NR_CPUS;
|
||||
#endif
|
||||
}
|
||||
INIT_HLIST_HEAD(&rp->used_instances);
|
||||
INIT_HLIST_HEAD(&rp->free_instances);
|
||||
for (i = 0; i < rp->maxactive; i++) {
|
||||
inst = kmalloc(sizeof(struct kretprobe_instance), GFP_KERNEL);
|
||||
if (inst == NULL) {
|
||||
free_rp_inst(rp);
|
||||
return -ENOMEM;
|
||||
}
|
||||
INIT_HLIST_NODE(&inst->uflist);
|
||||
hlist_add_head(&inst->uflist, &rp->free_instances);
|
||||
}
|
||||
|
||||
rp->nmissed = 0;
|
||||
/* Establish function entry probe point */
|
||||
if ((ret = register_kprobe(&rp->kp)) != 0)
|
||||
free_rp_inst(rp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
void unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kretprobe_instance *ri;
|
||||
|
||||
unregister_kprobe(&rp->kp);
|
||||
/* No race here */
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
free_rp_inst(rp);
|
||||
while ((ri = get_used_rp_inst(rp)) != NULL) {
|
||||
ri->rp = NULL;
|
||||
hlist_del(&ri->uflist);
|
||||
}
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
}
|
||||
|
||||
static int __init init_kprobes(void)
|
||||
{
|
||||
int i, err = 0;
|
||||
|
||||
/* FIXME allocate the probe table, currently defined statically */
|
||||
/* initialize all list heads */
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++)
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
INIT_HLIST_HEAD(&kprobe_table[i]);
|
||||
INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
|
||||
}
|
||||
|
||||
err = arch_init();
|
||||
if (!err)
|
||||
err = register_die_notifier(&kprobe_exceptions_nb);
|
||||
|
||||
err = register_die_notifier(&kprobe_exceptions_nb);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -277,3 +588,6 @@ EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
EXPORT_SYMBOL_GPL(jprobe_return);
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
|
||||
|
@@ -30,12 +30,25 @@ static ssize_t hotplug_seqnum_show(struct subsystem *subsys, char *page)
|
||||
KERNEL_ATTR_RO(hotplug_seqnum);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
#include <asm/kexec.h>
|
||||
|
||||
static ssize_t crash_notes_show(struct subsystem *subsys, char *page)
|
||||
{
|
||||
return sprintf(page, "%p\n", (void *)crash_notes);
|
||||
}
|
||||
KERNEL_ATTR_RO(crash_notes);
|
||||
#endif
|
||||
|
||||
decl_subsys(kernel, NULL, NULL);
|
||||
EXPORT_SYMBOL_GPL(kernel_subsys);
|
||||
|
||||
static struct attribute * kernel_attrs[] = {
|
||||
#ifdef CONFIG_HOTPLUG
|
||||
&hotplug_seqnum_attr.attr,
|
||||
#endif
|
||||
#ifdef CONFIG_KEXEC
|
||||
&crash_notes_attr.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/stop_machine.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/semaphore.h>
|
||||
#include <asm/cacheflush.h>
|
||||
@@ -370,6 +371,43 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
#define MODINFO_ATTR(field) \
|
||||
static void setup_modinfo_##field(struct module *mod, const char *s) \
|
||||
{ \
|
||||
mod->field = kstrdup(s, GFP_KERNEL); \
|
||||
} \
|
||||
static ssize_t show_modinfo_##field(struct module_attribute *mattr, \
|
||||
struct module *mod, char *buffer) \
|
||||
{ \
|
||||
return sprintf(buffer, "%s\n", mod->field); \
|
||||
} \
|
||||
static int modinfo_##field##_exists(struct module *mod) \
|
||||
{ \
|
||||
return mod->field != NULL; \
|
||||
} \
|
||||
static void free_modinfo_##field(struct module *mod) \
|
||||
{ \
|
||||
kfree(mod->field); \
|
||||
mod->field = NULL; \
|
||||
} \
|
||||
static struct module_attribute modinfo_##field = { \
|
||||
.attr = { .name = __stringify(field), .mode = 0444, \
|
||||
.owner = THIS_MODULE }, \
|
||||
.show = show_modinfo_##field, \
|
||||
.setup = setup_modinfo_##field, \
|
||||
.test = modinfo_##field##_exists, \
|
||||
.free = free_modinfo_##field, \
|
||||
};
|
||||
|
||||
MODINFO_ATTR(version);
|
||||
MODINFO_ATTR(srcversion);
|
||||
|
||||
static struct module_attribute *modinfo_attrs[] = {
|
||||
&modinfo_version,
|
||||
&modinfo_srcversion,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* Init the unload section of the module. */
|
||||
static void module_unload_init(struct module *mod)
|
||||
{
|
||||
@@ -379,7 +417,7 @@ static void module_unload_init(struct module *mod)
|
||||
for (i = 0; i < NR_CPUS; i++)
|
||||
local_set(&mod->ref[i].count, 0);
|
||||
/* Hold reference count during initialization. */
|
||||
local_set(&mod->ref[_smp_processor_id()].count, 1);
|
||||
local_set(&mod->ref[raw_smp_processor_id()].count, 1);
|
||||
/* Backwards compatibility macros put refcount during init. */
|
||||
mod->waiter = current;
|
||||
}
|
||||
@@ -692,7 +730,7 @@ static int obsparm_copy_string(const char *val, struct kernel_param *kp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int set_obsolete(const char *val, struct kernel_param *kp)
|
||||
static int set_obsolete(const char *val, struct kernel_param *kp)
|
||||
{
|
||||
unsigned int min, max;
|
||||
unsigned int size, maxsize;
|
||||
@@ -1031,6 +1069,32 @@ static void module_remove_refcnt_attr(struct module *mod)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
static int module_add_modinfo_attrs(struct module *mod)
|
||||
{
|
||||
struct module_attribute *attr;
|
||||
int error = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
|
||||
if (!attr->test ||
|
||||
(attr->test && attr->test(mod)))
|
||||
error = sysfs_create_file(&mod->mkobj.kobj,&attr->attr);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
|
||||
static void module_remove_modinfo_attrs(struct module *mod)
|
||||
{
|
||||
struct module_attribute *attr;
|
||||
int i;
|
||||
|
||||
for (i = 0; (attr = modinfo_attrs[i]); i++) {
|
||||
sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
|
||||
attr->free(mod);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static int mod_sysfs_setup(struct module *mod,
|
||||
struct kernel_param *kparam,
|
||||
@@ -1056,6 +1120,12 @@ static int mod_sysfs_setup(struct module *mod,
|
||||
if (err)
|
||||
goto out_unreg;
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
err = module_add_modinfo_attrs(mod);
|
||||
if (err)
|
||||
goto out_unreg;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
||||
out_unreg:
|
||||
@@ -1066,6 +1136,9 @@ out:
|
||||
|
||||
static void mod_kobject_remove(struct module *mod)
|
||||
{
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
module_remove_modinfo_attrs(mod);
|
||||
#endif
|
||||
module_remove_refcnt_attr(mod);
|
||||
module_param_sysfs_remove(mod);
|
||||
|
||||
@@ -1311,6 +1384,23 @@ static char *get_modinfo(Elf_Shdr *sechdrs,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs,
|
||||
unsigned int infoindex)
|
||||
{
|
||||
struct module_attribute *attr;
|
||||
int i;
|
||||
|
||||
for (i = 0; (attr = modinfo_attrs[i]); i++) {
|
||||
if (attr->setup)
|
||||
attr->setup(mod,
|
||||
get_modinfo(sechdrs,
|
||||
infoindex,
|
||||
attr->attr.name));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_KALLSYMS
|
||||
int is_exported(const char *name, const struct module *mod)
|
||||
{
|
||||
@@ -1615,6 +1705,11 @@ static struct module *load_module(void __user *umod,
|
||||
/* Set up license info based on the info section */
|
||||
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
|
||||
|
||||
#ifdef CONFIG_MODULE_UNLOAD
|
||||
/* Set up MODINFO_ATTR fields */
|
||||
setup_modinfo(mod, sechdrs, infoindex);
|
||||
#endif
|
||||
|
||||
/* Fix up syms, so that st_value is a pointer to location. */
|
||||
err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
|
||||
mod);
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include <linux/sysrq.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/kexec.h>
|
||||
|
||||
int panic_timeout;
|
||||
int panic_on_oops;
|
||||
@@ -63,6 +64,13 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
unsigned long caller = (unsigned long) __builtin_return_address(0);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* It's possible to come here directly from a panic-assertion and not
|
||||
* have preempt disabled. Some functions called from here want
|
||||
* preempt to be disabled. No point enabling it later though...
|
||||
*/
|
||||
preempt_disable();
|
||||
|
||||
bust_spinlocks(1);
|
||||
va_start(args, fmt);
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
@@ -70,7 +78,19 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
|
||||
bust_spinlocks(0);
|
||||
|
||||
/*
|
||||
* If we have crashed and we have a crash kernel loaded let it handle
|
||||
* everything else.
|
||||
* Do we want to call this before we try to display a message?
|
||||
*/
|
||||
crash_kexec(NULL);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Note smp_send_stop is the usual smp shutdown function, which
|
||||
* unfortunately means it may not be hardened to work in a panic
|
||||
* situation.
|
||||
*/
|
||||
smp_send_stop();
|
||||
#endif
|
||||
|
||||
@@ -79,8 +99,7 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
if (!panic_blink)
|
||||
panic_blink = no_blink;
|
||||
|
||||
if (panic_timeout > 0)
|
||||
{
|
||||
if (panic_timeout > 0) {
|
||||
/*
|
||||
* Delay timeout seconds before rebooting the machine.
|
||||
* We can't use the "normal" timers since we just panicked..
|
||||
|
@@ -629,7 +629,7 @@ static ssize_t module_attr_show(struct kobject *kobj,
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->show)
|
||||
return -EPERM;
|
||||
return -EIO;
|
||||
|
||||
if (!try_module_get(mk->mod))
|
||||
return -ENODEV;
|
||||
@@ -653,7 +653,7 @@ static ssize_t module_attr_store(struct kobject *kobj,
|
||||
mk = to_module_kobject(kobj);
|
||||
|
||||
if (!attribute->store)
|
||||
return -EPERM;
|
||||
return -EIO;
|
||||
|
||||
if (!try_module_get(mk->mod))
|
||||
return -ENODEV;
|
||||
|
@@ -88,23 +88,6 @@ static kmem_cache_t *posix_timers_cache;
|
||||
static struct idr posix_timers_id;
|
||||
static DEFINE_SPINLOCK(idr_lock);
|
||||
|
||||
/*
|
||||
* Just because the timer is not in the timer list does NOT mean it is
|
||||
* inactive. It could be in the "fire" routine getting a new expire time.
|
||||
*/
|
||||
#define TIMER_INACTIVE 1
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# define timer_active(tmr) \
|
||||
((tmr)->it.real.timer.entry.prev != (void *)TIMER_INACTIVE)
|
||||
# define set_timer_inactive(tmr) \
|
||||
do { \
|
||||
(tmr)->it.real.timer.entry.prev = (void *)TIMER_INACTIVE; \
|
||||
} while (0)
|
||||
#else
|
||||
# define timer_active(tmr) BARFY // error to use outside of SMP
|
||||
# define set_timer_inactive(tmr) do { } while (0)
|
||||
#endif
|
||||
/*
|
||||
* we assume that the new SIGEV_THREAD_ID shares no bits with the other
|
||||
* SIGEV values. Here we put out an error if this assumption fails.
|
||||
@@ -226,7 +209,6 @@ static inline int common_timer_create(struct k_itimer *new_timer)
|
||||
init_timer(&new_timer->it.real.timer);
|
||||
new_timer->it.real.timer.data = (unsigned long) new_timer;
|
||||
new_timer->it.real.timer.function = posix_timer_fn;
|
||||
set_timer_inactive(new_timer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -480,7 +462,6 @@ static void posix_timer_fn(unsigned long __data)
|
||||
int do_notify = 1;
|
||||
|
||||
spin_lock_irqsave(&timr->it_lock, flags);
|
||||
set_timer_inactive(timr);
|
||||
if (!list_empty(&timr->it.real.abs_timer_entry)) {
|
||||
spin_lock(&abs_list.lock);
|
||||
do {
|
||||
@@ -983,8 +964,8 @@ common_timer_set(struct k_itimer *timr, int flags,
|
||||
* careful here. If smp we could be in the "fire" routine which will
|
||||
* be spinning as we hold the lock. But this is ONLY an SMP issue.
|
||||
*/
|
||||
if (try_to_del_timer_sync(&timr->it.real.timer) < 0) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (timer_active(timr) && !del_timer(&timr->it.real.timer))
|
||||
/*
|
||||
* It can only be active if on an other cpu. Since
|
||||
* we have cleared the interval stuff above, it should
|
||||
@@ -994,11 +975,9 @@ common_timer_set(struct k_itimer *timr, int flags,
|
||||
* a "retry" exit status.
|
||||
*/
|
||||
return TIMER_RETRY;
|
||||
|
||||
set_timer_inactive(timr);
|
||||
#else
|
||||
del_timer(&timr->it.real.timer);
|
||||
#endif
|
||||
}
|
||||
|
||||
remove_from_abslist(timr);
|
||||
|
||||
timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
|
||||
@@ -1083,8 +1062,9 @@ retry:
|
||||
static inline int common_timer_del(struct k_itimer *timer)
|
||||
{
|
||||
timer->it.real.incr = 0;
|
||||
|
||||
if (try_to_del_timer_sync(&timer->it.real.timer) < 0) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (timer_active(timer) && !del_timer(&timer->it.real.timer))
|
||||
/*
|
||||
* It can only be active if on an other cpu. Since
|
||||
* we have cleared the interval stuff above, it should
|
||||
@@ -1094,9 +1074,9 @@ static inline int common_timer_del(struct k_itimer *timer)
|
||||
* a "retry" exit status.
|
||||
*/
|
||||
return TIMER_RETRY;
|
||||
#else
|
||||
del_timer(&timer->it.real.timer);
|
||||
#endif
|
||||
}
|
||||
|
||||
remove_from_abslist(timer);
|
||||
|
||||
return 0;
|
||||
|
@@ -27,8 +27,8 @@ config PM_DEBUG
|
||||
like suspend support.
|
||||
|
||||
config SOFTWARE_SUSPEND
|
||||
bool "Software Suspend (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL && PM && SWAP
|
||||
bool "Software Suspend"
|
||||
depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP))
|
||||
---help---
|
||||
Enable the possibility of suspending the machine.
|
||||
It doesn't need APM.
|
||||
@@ -72,3 +72,7 @@ config PM_STD_PARTITION
|
||||
suspended image to. It will simply pick the first available swap
|
||||
device.
|
||||
|
||||
config SUSPEND_SMP
|
||||
bool
|
||||
depends on HOTPLUG_CPU && X86 && PM
|
||||
default y
|
||||
|
@@ -3,9 +3,9 @@ ifeq ($(CONFIG_PM_DEBUG),y)
|
||||
EXTRA_CFLAGS += -DDEBUG
|
||||
endif
|
||||
|
||||
swsusp-smp-$(CONFIG_SMP) += smp.o
|
||||
|
||||
obj-y := main.o process.o console.o pm.o
|
||||
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o $(swsusp-smp-y) disk.o
|
||||
obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o disk.o
|
||||
|
||||
obj-$(CONFIG_SUSPEND_SMP) += smp.o
|
||||
|
||||
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
|
||||
|
@@ -117,8 +117,8 @@ static void finish(void)
|
||||
{
|
||||
device_resume();
|
||||
platform_finish();
|
||||
enable_nonboot_cpus();
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
@@ -131,28 +131,35 @@ static int prepare_processes(void)
|
||||
|
||||
sys_sync();
|
||||
|
||||
disable_nonboot_cpus();
|
||||
|
||||
if (freeze_processes()) {
|
||||
error = -EBUSY;
|
||||
return error;
|
||||
goto thaw;
|
||||
}
|
||||
|
||||
if (pm_disk_mode == PM_DISK_PLATFORM) {
|
||||
if (pm_ops && pm_ops->prepare) {
|
||||
if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
|
||||
return error;
|
||||
goto thaw;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free memory before shutting down devices. */
|
||||
free_some_memory();
|
||||
|
||||
return 0;
|
||||
thaw:
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
return error;
|
||||
}
|
||||
|
||||
static void unprepare_processes(void)
|
||||
{
|
||||
enable_nonboot_cpus();
|
||||
platform_finish();
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
@@ -160,15 +167,9 @@ static int prepare_devices(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
disable_nonboot_cpus();
|
||||
if ((error = device_suspend(PMSG_FREEZE))) {
|
||||
if ((error = device_suspend(PMSG_FREEZE)))
|
||||
printk("Some devices failed to suspend\n");
|
||||
platform_finish();
|
||||
enable_nonboot_cpus();
|
||||
return error;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -185,9 +186,9 @@ int pm_suspend_disk(void)
|
||||
int error;
|
||||
|
||||
error = prepare_processes();
|
||||
if (!error) {
|
||||
error = prepare_devices();
|
||||
}
|
||||
if (error)
|
||||
return error;
|
||||
error = prepare_devices();
|
||||
|
||||
if (error) {
|
||||
unprepare_processes();
|
||||
@@ -250,7 +251,7 @@ static int software_resume(void)
|
||||
|
||||
if ((error = prepare_processes())) {
|
||||
swsusp_close();
|
||||
goto Cleanup;
|
||||
goto Done;
|
||||
}
|
||||
|
||||
pr_debug("PM: Reading swsusp image.\n");
|
||||
|
@@ -55,6 +55,13 @@ static int suspend_prepare(suspend_state_t state)
|
||||
|
||||
pm_prepare_console();
|
||||
|
||||
disable_nonboot_cpus();
|
||||
|
||||
if (num_online_cpus() != 1) {
|
||||
error = -EPERM;
|
||||
goto Enable_cpu;
|
||||
}
|
||||
|
||||
if (freeze_processes()) {
|
||||
error = -EAGAIN;
|
||||
goto Thaw;
|
||||
@@ -75,6 +82,8 @@ static int suspend_prepare(suspend_state_t state)
|
||||
pm_ops->finish(state);
|
||||
Thaw:
|
||||
thaw_processes();
|
||||
Enable_cpu:
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
return error;
|
||||
}
|
||||
@@ -113,6 +122,7 @@ static void suspend_finish(suspend_state_t state)
|
||||
if (pm_ops && pm_ops->finish)
|
||||
pm_ops->finish(state);
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
@@ -150,12 +160,6 @@ static int enter_state(suspend_state_t state)
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
/* Suspend is hard to get right on SMP. */
|
||||
if (num_online_cpus() != 1) {
|
||||
error = -EPERM;
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
|
||||
if ((error = suspend_prepare(state)))
|
||||
goto Unlock;
|
||||
|
@@ -32,7 +32,7 @@ static inline int freezeable(struct task_struct * p)
|
||||
}
|
||||
|
||||
/* Refrigerator is place where frozen processes are stored :-). */
|
||||
void refrigerator(unsigned long flag)
|
||||
void refrigerator(void)
|
||||
{
|
||||
/* Hmm, should we be allowed to suspend when there are realtime
|
||||
processes around? */
|
||||
@@ -41,14 +41,13 @@ void refrigerator(unsigned long flag)
|
||||
current->state = TASK_UNINTERRUPTIBLE;
|
||||
pr_debug("%s entered refrigerator\n", current->comm);
|
||||
printk("=");
|
||||
current->flags &= ~PF_FREEZE;
|
||||
|
||||
frozen_process(current);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
recalc_sigpending(); /* We sent fake signal, clean it up */
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
current->flags |= PF_FROZEN;
|
||||
while (current->flags & PF_FROZEN)
|
||||
while (frozen(current))
|
||||
schedule();
|
||||
pr_debug("%s left refrigerator\n", current->comm);
|
||||
current->state = save;
|
||||
@@ -57,10 +56,10 @@ void refrigerator(unsigned long flag)
|
||||
/* 0 = success, else # of processes that we failed to stop */
|
||||
int freeze_processes(void)
|
||||
{
|
||||
int todo;
|
||||
unsigned long start_time;
|
||||
int todo;
|
||||
unsigned long start_time;
|
||||
struct task_struct *g, *p;
|
||||
|
||||
|
||||
printk( "Stopping tasks: " );
|
||||
start_time = jiffies;
|
||||
do {
|
||||
@@ -70,14 +69,12 @@ int freeze_processes(void)
|
||||
unsigned long flags;
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
if ((p->flags & PF_FROZEN) ||
|
||||
if ((frozen(p)) ||
|
||||
(p->state == TASK_TRACED) ||
|
||||
(p->state == TASK_STOPPED))
|
||||
continue;
|
||||
|
||||
/* FIXME: smp problem here: we may not access other process' flags
|
||||
without locking */
|
||||
p->flags |= PF_FREEZE;
|
||||
freeze(p);
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
signal_wake_up(p, 0);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
@@ -91,7 +88,7 @@ int freeze_processes(void)
|
||||
return todo;
|
||||
}
|
||||
} while(todo);
|
||||
|
||||
|
||||
printk( "|\n" );
|
||||
BUG_ON(in_atomic());
|
||||
return 0;
|
||||
@@ -106,10 +103,7 @@ void thaw_processes(void)
|
||||
do_each_thread(g, p) {
|
||||
if (!freezeable(p))
|
||||
continue;
|
||||
if (p->flags & PF_FROZEN) {
|
||||
p->flags &= ~PF_FROZEN;
|
||||
wake_up_process(p);
|
||||
} else
|
||||
if (!thaw_process(p))
|
||||
printk(KERN_INFO " Strange, %s not stopped\n", p->comm );
|
||||
} while_each_thread(g, p);
|
||||
|
||||
|
@@ -13,73 +13,52 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
static atomic_t cpu_counter, freeze;
|
||||
|
||||
|
||||
static void smp_pause(void * data)
|
||||
{
|
||||
struct saved_context ctxt;
|
||||
__save_processor_state(&ctxt);
|
||||
printk("Sleeping in:\n");
|
||||
dump_stack();
|
||||
atomic_inc(&cpu_counter);
|
||||
while (atomic_read(&freeze)) {
|
||||
/* FIXME: restore takes place at random piece inside this.
|
||||
This should probably be written in assembly, and
|
||||
preserve general-purpose registers, too
|
||||
|
||||
What about stack? We may need to move to new stack here.
|
||||
|
||||
This should better be ran with interrupts disabled.
|
||||
*/
|
||||
cpu_relax();
|
||||
barrier();
|
||||
}
|
||||
atomic_dec(&cpu_counter);
|
||||
__restore_processor_state(&ctxt);
|
||||
}
|
||||
|
||||
static cpumask_t oldmask;
|
||||
/* This is protected by pm_sem semaphore */
|
||||
static cpumask_t frozen_cpus;
|
||||
|
||||
void disable_nonboot_cpus(void)
|
||||
{
|
||||
oldmask = current->cpus_allowed;
|
||||
set_cpus_allowed(current, cpumask_of_cpu(0));
|
||||
printk("Freezing CPUs (at %d)", _smp_processor_id());
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
schedule_timeout(HZ);
|
||||
printk("...");
|
||||
BUG_ON(_smp_processor_id() != 0);
|
||||
int cpu, error;
|
||||
|
||||
/* FIXME: for this to work, all the CPUs must be running
|
||||
* "idle" thread (or we deadlock). Is that guaranteed? */
|
||||
|
||||
atomic_set(&cpu_counter, 0);
|
||||
atomic_set(&freeze, 1);
|
||||
smp_call_function(smp_pause, NULL, 0, 0);
|
||||
while (atomic_read(&cpu_counter) < (num_online_cpus() - 1)) {
|
||||
cpu_relax();
|
||||
barrier();
|
||||
error = 0;
|
||||
cpus_clear(frozen_cpus);
|
||||
printk("Freezing cpus ...\n");
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == 0)
|
||||
continue;
|
||||
error = cpu_down(cpu);
|
||||
if (!error) {
|
||||
cpu_set(cpu, frozen_cpus);
|
||||
printk("CPU%d is down\n", cpu);
|
||||
continue;
|
||||
}
|
||||
printk("Error taking cpu %d down: %d\n", cpu, error);
|
||||
}
|
||||
printk("ok\n");
|
||||
BUG_ON(smp_processor_id() != 0);
|
||||
if (error)
|
||||
panic("cpus not sleeping");
|
||||
}
|
||||
|
||||
void enable_nonboot_cpus(void)
|
||||
{
|
||||
printk("Restarting CPUs");
|
||||
atomic_set(&freeze, 0);
|
||||
while (atomic_read(&cpu_counter)) {
|
||||
cpu_relax();
|
||||
barrier();
|
||||
}
|
||||
printk("...");
|
||||
set_cpus_allowed(current, oldmask);
|
||||
schedule();
|
||||
printk("ok\n");
|
||||
int cpu, error;
|
||||
|
||||
printk("Thawing cpus ...\n");
|
||||
for_each_cpu_mask(cpu, frozen_cpus) {
|
||||
error = smp_prepare_cpu(cpu);
|
||||
if (!error)
|
||||
error = cpu_up(cpu);
|
||||
if (!error) {
|
||||
printk("CPU%d is up\n", cpu);
|
||||
continue;
|
||||
}
|
||||
printk("Error taking cpu %d up: %d\n", cpu, error);
|
||||
panic("Not enough cpus");
|
||||
}
|
||||
cpus_clear(frozen_cpus);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -10,12 +10,12 @@
|
||||
* This file is released under the GPLv2.
|
||||
*
|
||||
* I'd like to thank the following people for their work:
|
||||
*
|
||||
*
|
||||
* Pavel Machek <pavel@ucw.cz>:
|
||||
* Modifications, defectiveness pointing, being with me at the very beginning,
|
||||
* suspend to swap space, stop all tasks. Port to 2.4.18-ac and 2.5.17.
|
||||
*
|
||||
* Steve Doddi <dirk@loth.demon.co.uk>:
|
||||
* Steve Doddi <dirk@loth.demon.co.uk>:
|
||||
* Support the possibility of hardware state restoring.
|
||||
*
|
||||
* Raph <grey.havens@earthling.net>:
|
||||
@@ -81,14 +81,14 @@ static int nr_copy_pages_check;
|
||||
extern char resume_file[];
|
||||
|
||||
/* Local variables that should not be affected by save */
|
||||
unsigned int nr_copy_pages __nosavedata = 0;
|
||||
static unsigned int nr_copy_pages __nosavedata = 0;
|
||||
|
||||
/* Suspend pagedir is allocated before final copy, therefore it
|
||||
must be freed after resume
|
||||
must be freed after resume
|
||||
|
||||
Warning: this is evil. There are actually two pagedirs at time of
|
||||
resume. One is "pagedir_save", which is empty frame allocated at
|
||||
time of suspend, that must be freed. Second is "pagedir_nosave",
|
||||
time of suspend, that must be freed. Second is "pagedir_nosave",
|
||||
allocated at time of resume, that travels through memory not to
|
||||
collide with anything.
|
||||
|
||||
@@ -132,7 +132,7 @@ static int mark_swapfiles(swp_entry_t prev)
|
||||
{
|
||||
int error;
|
||||
|
||||
rw_swap_page_sync(READ,
|
||||
rw_swap_page_sync(READ,
|
||||
swp_entry(root_swap, 0),
|
||||
virt_to_page((unsigned long)&swsusp_header));
|
||||
if (!memcmp("SWAP-SPACE",swsusp_header.sig, 10) ||
|
||||
@@ -140,7 +140,7 @@ static int mark_swapfiles(swp_entry_t prev)
|
||||
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
|
||||
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
|
||||
swsusp_header.swsusp_info = prev;
|
||||
error = rw_swap_page_sync(WRITE,
|
||||
error = rw_swap_page_sync(WRITE,
|
||||
swp_entry(root_swap, 0),
|
||||
virt_to_page((unsigned long)
|
||||
&swsusp_header));
|
||||
@@ -174,22 +174,22 @@ static int is_resume_device(const struct swap_info_struct *swap_info)
|
||||
static int swsusp_swap_check(void) /* This is called before saving image */
|
||||
{
|
||||
int i, len;
|
||||
|
||||
|
||||
len=strlen(resume_file);
|
||||
root_swap = 0xFFFF;
|
||||
|
||||
|
||||
swap_list_lock();
|
||||
for(i=0; i<MAX_SWAPFILES; i++) {
|
||||
for (i=0; i<MAX_SWAPFILES; i++) {
|
||||
if (swap_info[i].flags == 0) {
|
||||
swapfile_used[i]=SWAPFILE_UNUSED;
|
||||
} else {
|
||||
if(!len) {
|
||||
if (!len) {
|
||||
printk(KERN_WARNING "resume= option should be used to set suspend device" );
|
||||
if(root_swap == 0xFFFF) {
|
||||
if (root_swap == 0xFFFF) {
|
||||
swapfile_used[i] = SWAPFILE_SUSPEND;
|
||||
root_swap = i;
|
||||
} else
|
||||
swapfile_used[i] = SWAPFILE_IGNORED;
|
||||
swapfile_used[i] = SWAPFILE_IGNORED;
|
||||
} else {
|
||||
/* we ignore all swap devices that are not the resume_file */
|
||||
if (is_resume_device(&swap_info[i])) {
|
||||
@@ -209,15 +209,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */
|
||||
* This is called after saving image so modification
|
||||
* will be lost after resume... and that's what we want.
|
||||
* we make the device unusable. A new call to
|
||||
* lock_swapdevices can unlock the devices.
|
||||
* lock_swapdevices can unlock the devices.
|
||||
*/
|
||||
static void lock_swapdevices(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
swap_list_lock();
|
||||
for(i = 0; i< MAX_SWAPFILES; i++)
|
||||
if(swapfile_used[i] == SWAPFILE_IGNORED) {
|
||||
for (i = 0; i< MAX_SWAPFILES; i++)
|
||||
if (swapfile_used[i] == SWAPFILE_IGNORED) {
|
||||
swap_info[i].flags ^= 0xFF;
|
||||
}
|
||||
swap_list_unlock();
|
||||
@@ -229,7 +229,7 @@ static void lock_swapdevices(void)
|
||||
* @loc: Place to store the entry we used.
|
||||
*
|
||||
* Allocate a new swap entry and 'sync' it. Note we discard -EIO
|
||||
* errors. That is an artifact left over from swsusp. It did not
|
||||
* errors. That is an artifact left over from swsusp. It did not
|
||||
* check the return of rw_swap_page_sync() at all, since most pages
|
||||
* written back to swap would return -EIO.
|
||||
* This is a partial improvement, since we will at least return other
|
||||
@@ -241,7 +241,7 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
|
||||
int error = 0;
|
||||
|
||||
entry = get_swap_page();
|
||||
if (swp_offset(entry) &&
|
||||
if (swp_offset(entry) &&
|
||||
swapfile_used[swp_type(entry)] == SWAPFILE_SUSPEND) {
|
||||
error = rw_swap_page_sync(WRITE, entry,
|
||||
virt_to_page(addr));
|
||||
@@ -257,7 +257,7 @@ static int write_page(unsigned long addr, swp_entry_t * loc)
|
||||
/**
|
||||
* data_free - Free the swap entries used by the saved image.
|
||||
*
|
||||
* Walk the list of used swap entries and free each one.
|
||||
* Walk the list of used swap entries and free each one.
|
||||
* This is only used for cleanup when suspend fails.
|
||||
*/
|
||||
static void data_free(void)
|
||||
@@ -290,7 +290,7 @@ static int data_write(void)
|
||||
mod = 1;
|
||||
|
||||
printk( "Writing data to swap (%d pages)... ", nr_copy_pages );
|
||||
for_each_pbe(p, pagedir_nosave) {
|
||||
for_each_pbe (p, pagedir_nosave) {
|
||||
if (!(i%mod))
|
||||
printk( "\b\b\b\b%3d%%", i / mod );
|
||||
if ((error = write_page(p->address, &(p->swap_address))))
|
||||
@@ -335,7 +335,7 @@ static int close_swap(void)
|
||||
|
||||
dump_info();
|
||||
error = write_page((unsigned long)&swsusp_info, &entry);
|
||||
if (!error) {
|
||||
if (!error) {
|
||||
printk( "S" );
|
||||
error = mark_swapfiles(entry);
|
||||
printk( "|\n" );
|
||||
@@ -370,7 +370,7 @@ static int write_pagedir(void)
|
||||
struct pbe * pbe;
|
||||
|
||||
printk( "Writing pagedir...");
|
||||
for_each_pb_page(pbe, pagedir_nosave) {
|
||||
for_each_pb_page (pbe, pagedir_nosave) {
|
||||
if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
|
||||
return error;
|
||||
}
|
||||
@@ -472,7 +472,7 @@ static int save_highmem(void)
|
||||
int res = 0;
|
||||
|
||||
pr_debug("swsusp: Saving Highmem\n");
|
||||
for_each_zone(zone) {
|
||||
for_each_zone (zone) {
|
||||
if (is_highmem(zone))
|
||||
res = save_highmem_zone(zone);
|
||||
if (res)
|
||||
@@ -547,7 +547,7 @@ static void count_data_pages(void)
|
||||
|
||||
nr_copy_pages = 0;
|
||||
|
||||
for_each_zone(zone) {
|
||||
for_each_zone (zone) {
|
||||
if (is_highmem(zone))
|
||||
continue;
|
||||
mark_free_pages(zone);
|
||||
@@ -562,9 +562,9 @@ static void copy_data_pages(void)
|
||||
struct zone *zone;
|
||||
unsigned long zone_pfn;
|
||||
struct pbe * pbe = pagedir_nosave;
|
||||
|
||||
|
||||
pr_debug("copy_data_pages(): pages to copy: %d\n", nr_copy_pages);
|
||||
for_each_zone(zone) {
|
||||
for_each_zone (zone) {
|
||||
if (is_highmem(zone))
|
||||
continue;
|
||||
mark_free_pages(zone);
|
||||
@@ -702,7 +702,7 @@ static void free_image_pages(void)
|
||||
{
|
||||
struct pbe * p;
|
||||
|
||||
for_each_pbe(p, pagedir_save) {
|
||||
for_each_pbe (p, pagedir_save) {
|
||||
if (p->address) {
|
||||
ClearPageNosave(virt_to_page(p->address));
|
||||
free_page(p->address);
|
||||
@@ -719,7 +719,7 @@ static int alloc_image_pages(void)
|
||||
{
|
||||
struct pbe * p;
|
||||
|
||||
for_each_pbe(p, pagedir_save) {
|
||||
for_each_pbe (p, pagedir_save) {
|
||||
p->address = get_zeroed_page(GFP_ATOMIC | __GFP_COLD);
|
||||
if (!p->address)
|
||||
return -ENOMEM;
|
||||
@@ -740,7 +740,7 @@ void swsusp_free(void)
|
||||
/**
|
||||
* enough_free_mem - Make sure we enough free memory to snapshot.
|
||||
*
|
||||
* Returns TRUE or FALSE after checking the number of available
|
||||
* Returns TRUE or FALSE after checking the number of available
|
||||
* free pages.
|
||||
*/
|
||||
|
||||
@@ -758,11 +758,11 @@ static int enough_free_mem(void)
|
||||
/**
|
||||
* enough_swap - Make sure we have enough swap to save the image.
|
||||
*
|
||||
* Returns TRUE or FALSE after checking the total amount of swap
|
||||
* Returns TRUE or FALSE after checking the total amount of swap
|
||||
* space avaiable.
|
||||
*
|
||||
* FIXME: si_swapinfo(&i) returns all swap devices information.
|
||||
* We should only consider resume_device.
|
||||
* We should only consider resume_device.
|
||||
*/
|
||||
|
||||
static int enough_swap(void)
|
||||
@@ -781,18 +781,18 @@ static int swsusp_alloc(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
pagedir_nosave = NULL;
|
||||
nr_copy_pages = calc_nr(nr_copy_pages);
|
||||
|
||||
pr_debug("suspend: (pages needed: %d + %d free: %d)\n",
|
||||
nr_copy_pages, PAGES_FOR_IO, nr_free_pages());
|
||||
|
||||
pagedir_nosave = NULL;
|
||||
if (!enough_free_mem())
|
||||
return -ENOMEM;
|
||||
|
||||
if (!enough_swap())
|
||||
return -ENOSPC;
|
||||
|
||||
nr_copy_pages = calc_nr(nr_copy_pages);
|
||||
|
||||
if (!(pagedir_save = alloc_pagedir(nr_copy_pages))) {
|
||||
printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
|
||||
return -ENOMEM;
|
||||
@@ -827,8 +827,8 @@ static int suspend_prepare_image(void)
|
||||
error = swsusp_alloc();
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
/* During allocating of suspend pagedir, new cold pages may appear.
|
||||
|
||||
/* During allocating of suspend pagedir, new cold pages may appear.
|
||||
* Kill them.
|
||||
*/
|
||||
drain_local_pages();
|
||||
@@ -929,21 +929,6 @@ int swsusp_resume(void)
|
||||
return error;
|
||||
}
|
||||
|
||||
/* More restore stuff */
|
||||
|
||||
/*
|
||||
* Returns true if given address/order collides with any orig_address
|
||||
*/
|
||||
static int does_collide_order(unsigned long addr, int order)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i=0; i < (1<<order); i++)
|
||||
if (!PageNosaveFree(virt_to_page(addr + i * PAGE_SIZE)))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* On resume, for storing the PBE list and the image,
|
||||
* we can only use memory pages that do not conflict with the pages
|
||||
@@ -973,7 +958,7 @@ static unsigned long get_usable_page(unsigned gfp_mask)
|
||||
unsigned long m;
|
||||
|
||||
m = get_zeroed_page(gfp_mask);
|
||||
while (does_collide_order(m, 0)) {
|
||||
while (!PageNosaveFree(virt_to_page(m))) {
|
||||
eat_page((void *)m);
|
||||
m = get_zeroed_page(gfp_mask);
|
||||
if (!m)
|
||||
@@ -1045,7 +1030,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
|
||||
|
||||
/* Set page flags */
|
||||
|
||||
for_each_zone(zone) {
|
||||
for_each_zone (zone) {
|
||||
for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn)
|
||||
SetPageNosaveFree(pfn_to_page(zone_pfn +
|
||||
zone->zone_start_pfn));
|
||||
@@ -1061,7 +1046,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
|
||||
/* Relocate colliding pages */
|
||||
|
||||
for_each_pb_page (pbpage, pblist) {
|
||||
if (does_collide_order((unsigned long)pbpage, 0)) {
|
||||
if (!PageNosaveFree(virt_to_page((unsigned long)pbpage))) {
|
||||
m = (void *)get_usable_page(GFP_ATOMIC | __GFP_COLD);
|
||||
if (!m) {
|
||||
error = -ENOMEM;
|
||||
@@ -1193,8 +1178,10 @@ static const char * sanity_check(void)
|
||||
return "version";
|
||||
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
|
||||
return "machine";
|
||||
#if 0
|
||||
if(swsusp_info.cpus != num_online_cpus())
|
||||
return "number of cpus";
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@@ -588,8 +588,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
log_level_unknown = 1;
|
||||
}
|
||||
|
||||
if (!cpu_online(smp_processor_id()) &&
|
||||
system_state != SYSTEM_RUNNING) {
|
||||
if (!cpu_online(smp_processor_id())) {
|
||||
/*
|
||||
* Some console drivers may assume that per-cpu resources have
|
||||
* been allocated. So don't allow them to be called by this
|
||||
@@ -876,8 +875,10 @@ void register_console(struct console * console)
|
||||
break;
|
||||
console->flags |= CON_ENABLED;
|
||||
console->index = console_cmdline[i].index;
|
||||
if (i == preferred_console)
|
||||
if (i == selected_console) {
|
||||
console->flags |= CON_CONSDEV;
|
||||
preferred_console = selected_console;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -897,6 +898,8 @@ void register_console(struct console * console)
|
||||
if ((console->flags & CON_CONSDEV) || console_drivers == NULL) {
|
||||
console->next = console_drivers;
|
||||
console_drivers = console;
|
||||
if (console->next)
|
||||
console->next->flags &= ~CON_CONSDEV;
|
||||
} else {
|
||||
console->next = console_drivers->next;
|
||||
console_drivers->next = console;
|
||||
@@ -937,10 +940,14 @@ int unregister_console(struct console * console)
|
||||
/* If last console is removed, we re-enable picking the first
|
||||
* one that gets registered. Without that, pmac early boot console
|
||||
* would prevent fbcon from taking over.
|
||||
*
|
||||
* If this isn't the last console and it has CON_CONSDEV set, we
|
||||
* need to set it on the next preferred console.
|
||||
*/
|
||||
if (console_drivers == NULL)
|
||||
preferred_console = selected_console;
|
||||
|
||||
else if (console->flags & CON_CONSDEV)
|
||||
console_drivers->flags |= CON_CONSDEV;
|
||||
|
||||
release_console_sem();
|
||||
return res;
|
||||
|
@@ -263,7 +263,7 @@ static int find_resource(struct resource *root, struct resource *new,
|
||||
new->start = min;
|
||||
if (new->end > max)
|
||||
new->end = max;
|
||||
new->start = (new->start + align - 1) & ~(align - 1);
|
||||
new->start = ALIGN(new->start, align);
|
||||
if (alignf)
|
||||
alignf(alignf_data, new, size, align);
|
||||
if (new->start < new->end && new->end - new->start >= size - 1) {
|
||||
|
1077
kernel/sched.c
1077
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@@ -213,6 +213,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked)
|
||||
fastcall void recalc_sigpending_tsk(struct task_struct *t)
|
||||
{
|
||||
if (t->signal->group_stop_count > 0 ||
|
||||
(freezing(t)) ||
|
||||
PENDING(&t->pending, &t->blocked) ||
|
||||
PENDING(&t->signal->shared_pending, &t->blocked))
|
||||
set_tsk_thread_flag(t, TIF_SIGPENDING);
|
||||
@@ -2230,8 +2231,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese,
|
||||
current->state = TASK_INTERRUPTIBLE;
|
||||
timeout = schedule_timeout(timeout);
|
||||
|
||||
if (current->flags & PF_FREEZE)
|
||||
refrigerator(PF_FREEZE);
|
||||
try_to_freeze();
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
sig = dequeue_signal(current, &these, &info);
|
||||
current->blocked = current->real_blocked;
|
||||
|
@@ -100,7 +100,7 @@ static int stop_machine(void)
|
||||
stopmachine_state = STOPMACHINE_WAIT;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
if (i == _smp_processor_id())
|
||||
if (i == raw_smp_processor_id())
|
||||
continue;
|
||||
ret = kernel_thread(stopmachine, (void *)(long)i,CLONE_KERNEL);
|
||||
if (ret < 0)
|
||||
@@ -182,7 +182,7 @@ struct task_struct *__stop_machine_run(int (*fn)(void *), void *data,
|
||||
|
||||
/* If they don't care which CPU fn runs on, bind to any online one. */
|
||||
if (cpu == NR_CPUS)
|
||||
cpu = _smp_processor_id();
|
||||
cpu = raw_smp_processor_id();
|
||||
|
||||
p = kthread_create(do_stop, &smdata, "kstopmachine");
|
||||
if (!IS_ERR(p)) {
|
||||
|
131
kernel/sys.c
131
kernel/sys.c
@@ -16,6 +16,8 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/highuid.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/key.h>
|
||||
@@ -405,6 +407,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
|
||||
case LINUX_REBOOT_CMD_HALT:
|
||||
notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
|
||||
system_state = SYSTEM_HALT;
|
||||
device_suspend(PMSG_SUSPEND);
|
||||
device_shutdown();
|
||||
printk(KERN_EMERG "System halted.\n");
|
||||
machine_halt();
|
||||
@@ -415,6 +418,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
|
||||
case LINUX_REBOOT_CMD_POWER_OFF:
|
||||
notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
|
||||
system_state = SYSTEM_POWER_OFF;
|
||||
device_suspend(PMSG_SUSPEND);
|
||||
device_shutdown();
|
||||
printk(KERN_EMERG "Power down.\n");
|
||||
machine_power_off();
|
||||
@@ -431,11 +435,30 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
|
||||
|
||||
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
|
||||
system_state = SYSTEM_RESTART;
|
||||
device_suspend(PMSG_FREEZE);
|
||||
device_shutdown();
|
||||
printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
|
||||
machine_restart(buffer);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
case LINUX_REBOOT_CMD_KEXEC:
|
||||
{
|
||||
struct kimage *image;
|
||||
image = xchg(&kexec_image, 0);
|
||||
if (!image) {
|
||||
unlock_kernel();
|
||||
return -EINVAL;
|
||||
}
|
||||
notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
|
||||
system_state = SYSTEM_RESTART;
|
||||
device_shutdown();
|
||||
printk(KERN_EMERG "Starting new kernel\n");
|
||||
machine_shutdown();
|
||||
machine_kexec(image);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_SOFTWARE_SUSPEND
|
||||
case LINUX_REBOOT_CMD_SW_SUSPEND:
|
||||
{
|
||||
@@ -525,7 +548,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
|
||||
}
|
||||
if (new_egid != old_egid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
if (rgid != (gid_t) -1 ||
|
||||
@@ -556,7 +579,7 @@ asmlinkage long sys_setgid(gid_t gid)
|
||||
{
|
||||
if(old_egid != gid)
|
||||
{
|
||||
current->mm->dumpable=0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->gid = current->egid = current->sgid = current->fsgid = gid;
|
||||
@@ -565,7 +588,7 @@ asmlinkage long sys_setgid(gid_t gid)
|
||||
{
|
||||
if(old_egid != gid)
|
||||
{
|
||||
current->mm->dumpable=0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->egid = current->fsgid = gid;
|
||||
@@ -596,7 +619,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
|
||||
|
||||
if(dumpclear)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->uid = new_ruid;
|
||||
@@ -653,7 +676,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
|
||||
|
||||
if (new_euid != old_euid)
|
||||
{
|
||||
current->mm->dumpable=0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->fsuid = current->euid = new_euid;
|
||||
@@ -703,7 +726,7 @@ asmlinkage long sys_setuid(uid_t uid)
|
||||
|
||||
if (old_euid != uid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->fsuid = current->euid = uid;
|
||||
@@ -748,7 +771,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
|
||||
if (euid != (uid_t) -1) {
|
||||
if (euid != current->euid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->euid = euid;
|
||||
@@ -798,7 +821,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
|
||||
if (egid != (gid_t) -1) {
|
||||
if (egid != current->egid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->egid = egid;
|
||||
@@ -845,7 +868,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
|
||||
{
|
||||
if (uid != old_fsuid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->fsuid = uid;
|
||||
@@ -875,7 +898,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
|
||||
{
|
||||
if (gid != old_fsgid)
|
||||
{
|
||||
current->mm->dumpable = 0;
|
||||
current->mm->dumpable = suid_dumpable;
|
||||
smp_wmb();
|
||||
}
|
||||
current->fsgid = gid;
|
||||
@@ -894,35 +917,69 @@ asmlinkage long sys_times(struct tms __user * tbuf)
|
||||
*/
|
||||
if (tbuf) {
|
||||
struct tms tmp;
|
||||
struct task_struct *tsk = current;
|
||||
struct task_struct *t;
|
||||
cputime_t utime, stime, cutime, cstime;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
utime = tsk->signal->utime;
|
||||
stime = tsk->signal->stime;
|
||||
t = tsk;
|
||||
do {
|
||||
utime = cputime_add(utime, t->utime);
|
||||
stime = cputime_add(stime, t->stime);
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
#ifdef CONFIG_SMP
|
||||
if (thread_group_empty(current)) {
|
||||
/*
|
||||
* Single thread case without the use of any locks.
|
||||
*
|
||||
* We may race with release_task if two threads are
|
||||
* executing. However, release task first adds up the
|
||||
* counters (__exit_signal) before removing the task
|
||||
* from the process tasklist (__unhash_process).
|
||||
* __exit_signal also acquires and releases the
|
||||
* siglock which results in the proper memory ordering
|
||||
* so that the list modifications are always visible
|
||||
* after the counters have been updated.
|
||||
*
|
||||
* If the counters have been updated by the second thread
|
||||
* but the thread has not yet been removed from the list
|
||||
* then the other branch will be executing which will
|
||||
* block on tasklist_lock until the exit handling of the
|
||||
* other task is finished.
|
||||
*
|
||||
* This also implies that the sighand->siglock cannot
|
||||
* be held by another processor. So we can also
|
||||
* skip acquiring that lock.
|
||||
*/
|
||||
utime = cputime_add(current->signal->utime, current->utime);
|
||||
stime = cputime_add(current->signal->utime, current->stime);
|
||||
cutime = current->signal->cutime;
|
||||
cstime = current->signal->cstime;
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
|
||||
/*
|
||||
* While we have tasklist_lock read-locked, no dying thread
|
||||
* can be updating current->signal->[us]time. Instead,
|
||||
* we got their counts included in the live thread loop.
|
||||
* However, another thread can come in right now and
|
||||
* do a wait call that updates current->signal->c[us]time.
|
||||
* To make sure we always see that pair updated atomically,
|
||||
* we take the siglock around fetching them.
|
||||
*/
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cutime = tsk->signal->cutime;
|
||||
cstime = tsk->signal->cstime;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
/* Process with multiple threads */
|
||||
struct task_struct *tsk = current;
|
||||
struct task_struct *t;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
utime = tsk->signal->utime;
|
||||
stime = tsk->signal->stime;
|
||||
t = tsk;
|
||||
do {
|
||||
utime = cputime_add(utime, t->utime);
|
||||
stime = cputime_add(stime, t->stime);
|
||||
t = next_thread(t);
|
||||
} while (t != tsk);
|
||||
|
||||
/*
|
||||
* While we have tasklist_lock read-locked, no dying thread
|
||||
* can be updating current->signal->[us]time. Instead,
|
||||
* we got their counts included in the live thread loop.
|
||||
* However, another thread can come in right now and
|
||||
* do a wait call that updates current->signal->c[us]time.
|
||||
* To make sure we always see that pair updated atomically,
|
||||
* we take the siglock around fetching them.
|
||||
*/
|
||||
spin_lock_irq(&tsk->sighand->siglock);
|
||||
cutime = tsk->signal->cutime;
|
||||
cstime = tsk->signal->cstime;
|
||||
spin_unlock_irq(&tsk->sighand->siglock);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
tmp.tms_utime = cputime_to_clock_t(utime);
|
||||
tmp.tms_stime = cputime_to_clock_t(stime);
|
||||
tmp.tms_cutime = cputime_to_clock_t(cutime);
|
||||
@@ -1225,7 +1282,7 @@ static void groups_sort(struct group_info *group_info)
|
||||
}
|
||||
|
||||
/* a simple bsearch */
|
||||
static int groups_search(struct group_info *group_info, gid_t grp)
|
||||
int groups_search(struct group_info *group_info, gid_t grp)
|
||||
{
|
||||
int left, right;
|
||||
|
||||
@@ -1652,7 +1709,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
|
||||
error = 1;
|
||||
break;
|
||||
case PR_SET_DUMPABLE:
|
||||
if (arg2 != 0 && arg2 != 1) {
|
||||
if (arg2 < 0 || arg2 > 2) {
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
@@ -18,6 +18,8 @@ cond_syscall(sys_acct);
|
||||
cond_syscall(sys_lookup_dcookie);
|
||||
cond_syscall(sys_swapon);
|
||||
cond_syscall(sys_swapoff);
|
||||
cond_syscall(sys_kexec_load);
|
||||
cond_syscall(compat_sys_kexec_load);
|
||||
cond_syscall(sys_init_module);
|
||||
cond_syscall(sys_delete_module);
|
||||
cond_syscall(sys_socketpair);
|
||||
@@ -77,6 +79,7 @@ cond_syscall(sys_request_key);
|
||||
cond_syscall(sys_keyctl);
|
||||
cond_syscall(compat_sys_keyctl);
|
||||
cond_syscall(compat_sys_socketcall);
|
||||
cond_syscall(sys_set_zone_reclaim);
|
||||
|
||||
/* arch-specific weak syscall entries */
|
||||
cond_syscall(sys_pciconfig_read);
|
||||
|
@@ -58,6 +58,7 @@ extern int sysctl_overcommit_ratio;
|
||||
extern int max_threads;
|
||||
extern int sysrq_enabled;
|
||||
extern int core_uses_pid;
|
||||
extern int suid_dumpable;
|
||||
extern char core_pattern[];
|
||||
extern int cad_pid;
|
||||
extern int pid_max;
|
||||
@@ -950,6 +951,14 @@ static ctl_table fs_table[] = {
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.ctl_name = KERN_SETUID_DUMPABLE,
|
||||
.procname = "suid_dumpable",
|
||||
.data = &suid_dumpable,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = &proc_dointvec,
|
||||
},
|
||||
{ .ctl_name = 0 }
|
||||
};
|
||||
|
||||
@@ -991,8 +1000,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol
|
||||
int error = parse_table(name, nlen, oldval, oldlenp,
|
||||
newval, newlen, head->ctl_table,
|
||||
&context);
|
||||
if (context)
|
||||
kfree(context);
|
||||
kfree(context);
|
||||
if (error != -ENOTDIR)
|
||||
return error;
|
||||
tmp = tmp->next;
|
||||
|
367
kernel/timer.c
367
kernel/timer.c
@@ -57,6 +57,11 @@ static void time_interpolator_update(long delta_nsec);
|
||||
#define TVN_MASK (TVN_SIZE - 1)
|
||||
#define TVR_MASK (TVR_SIZE - 1)
|
||||
|
||||
struct timer_base_s {
|
||||
spinlock_t lock;
|
||||
struct timer_list *running_timer;
|
||||
};
|
||||
|
||||
typedef struct tvec_s {
|
||||
struct list_head vec[TVN_SIZE];
|
||||
} tvec_t;
|
||||
@@ -66,9 +71,8 @@ typedef struct tvec_root_s {
|
||||
} tvec_root_t;
|
||||
|
||||
struct tvec_t_base_s {
|
||||
spinlock_t lock;
|
||||
struct timer_base_s t_base;
|
||||
unsigned long timer_jiffies;
|
||||
struct timer_list *running_timer;
|
||||
tvec_root_t tv1;
|
||||
tvec_t tv2;
|
||||
tvec_t tv3;
|
||||
@@ -77,18 +81,16 @@ struct tvec_t_base_s {
|
||||
} ____cacheline_aligned_in_smp;
|
||||
|
||||
typedef struct tvec_t_base_s tvec_base_t;
|
||||
static DEFINE_PER_CPU(tvec_base_t, tvec_bases);
|
||||
|
||||
static inline void set_running_timer(tvec_base_t *base,
|
||||
struct timer_list *timer)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
base->running_timer = timer;
|
||||
base->t_base.running_timer = timer;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Fake initialization */
|
||||
static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
|
||||
|
||||
static void check_timer_failed(struct timer_list *timer)
|
||||
{
|
||||
static int whine_count;
|
||||
@@ -103,7 +105,6 @@ static void check_timer_failed(struct timer_list *timer)
|
||||
/*
|
||||
* Now fix it up
|
||||
*/
|
||||
spin_lock_init(&timer->lock);
|
||||
timer->magic = TIMER_MAGIC;
|
||||
}
|
||||
|
||||
@@ -156,65 +157,113 @@ static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
|
||||
list_add_tail(&timer->entry, vec);
|
||||
}
|
||||
|
||||
typedef struct timer_base_s timer_base_t;
|
||||
/*
|
||||
* Used by TIMER_INITIALIZER, we can't use per_cpu(tvec_bases)
|
||||
* at compile time, and we need timer->base to lock the timer.
|
||||
*/
|
||||
timer_base_t __init_timer_base
|
||||
____cacheline_aligned_in_smp = { .lock = SPIN_LOCK_UNLOCKED };
|
||||
EXPORT_SYMBOL(__init_timer_base);
|
||||
|
||||
/***
|
||||
* init_timer - initialize a timer.
|
||||
* @timer: the timer to be initialized
|
||||
*
|
||||
* init_timer() must be done to a timer prior calling *any* of the
|
||||
* other timer functions.
|
||||
*/
|
||||
void fastcall init_timer(struct timer_list *timer)
|
||||
{
|
||||
timer->entry.next = NULL;
|
||||
timer->base = &per_cpu(tvec_bases, raw_smp_processor_id()).t_base;
|
||||
timer->magic = TIMER_MAGIC;
|
||||
}
|
||||
EXPORT_SYMBOL(init_timer);
|
||||
|
||||
static inline void detach_timer(struct timer_list *timer,
|
||||
int clear_pending)
|
||||
{
|
||||
struct list_head *entry = &timer->entry;
|
||||
|
||||
__list_del(entry->prev, entry->next);
|
||||
if (clear_pending)
|
||||
entry->next = NULL;
|
||||
entry->prev = LIST_POISON2;
|
||||
}
|
||||
|
||||
/*
|
||||
* We are using hashed locking: holding per_cpu(tvec_bases).t_base.lock
|
||||
* means that all timers which are tied to this base via timer->base are
|
||||
* locked, and the base itself is locked too.
|
||||
*
|
||||
* So __run_timers/migrate_timers can safely modify all timers which could
|
||||
* be found on ->tvX lists.
|
||||
*
|
||||
* When the timer's base is locked, and the timer removed from list, it is
|
||||
* possible to set timer->base = NULL and drop the lock: the timer remains
|
||||
* locked.
|
||||
*/
|
||||
static timer_base_t *lock_timer_base(struct timer_list *timer,
|
||||
unsigned long *flags)
|
||||
{
|
||||
timer_base_t *base;
|
||||
|
||||
for (;;) {
|
||||
base = timer->base;
|
||||
if (likely(base != NULL)) {
|
||||
spin_lock_irqsave(&base->lock, *flags);
|
||||
if (likely(base == timer->base))
|
||||
return base;
|
||||
/* The timer has migrated to another CPU */
|
||||
spin_unlock_irqrestore(&base->lock, *flags);
|
||||
}
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
||||
int __mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
tvec_base_t *old_base, *new_base;
|
||||
timer_base_t *base;
|
||||
tvec_base_t *new_base;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
BUG_ON(!timer->function);
|
||||
|
||||
check_timer(timer);
|
||||
|
||||
spin_lock_irqsave(&timer->lock, flags);
|
||||
new_base = &__get_cpu_var(tvec_bases);
|
||||
repeat:
|
||||
old_base = timer->base;
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
/*
|
||||
* Prevent deadlocks via ordering by old_base < new_base.
|
||||
*/
|
||||
if (old_base && (new_base != old_base)) {
|
||||
if (old_base < new_base) {
|
||||
spin_lock(&new_base->lock);
|
||||
spin_lock(&old_base->lock);
|
||||
} else {
|
||||
spin_lock(&old_base->lock);
|
||||
spin_lock(&new_base->lock);
|
||||
}
|
||||
/*
|
||||
* The timer base might have been cancelled while we were
|
||||
* trying to take the lock(s):
|
||||
*/
|
||||
if (timer->base != old_base) {
|
||||
spin_unlock(&new_base->lock);
|
||||
spin_unlock(&old_base->lock);
|
||||
goto repeat;
|
||||
}
|
||||
} else {
|
||||
spin_lock(&new_base->lock);
|
||||
if (timer->base != old_base) {
|
||||
spin_unlock(&new_base->lock);
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete the previous timeout (if there was any), and install
|
||||
* the new one:
|
||||
*/
|
||||
if (old_base) {
|
||||
list_del(&timer->entry);
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 0);
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
new_base = &__get_cpu_var(tvec_bases);
|
||||
|
||||
if (base != &new_base->t_base) {
|
||||
/*
|
||||
* We are trying to schedule the timer on the local CPU.
|
||||
* However we can't change timer's base while it is running,
|
||||
* otherwise del_timer_sync() can't detect that the timer's
|
||||
* handler yet has not finished. This also guarantees that
|
||||
* the timer is serialized wrt itself.
|
||||
*/
|
||||
if (unlikely(base->running_timer == timer)) {
|
||||
/* The timer remains on a former base */
|
||||
new_base = container_of(base, tvec_base_t, t_base);
|
||||
} else {
|
||||
/* See the comment in lock_timer_base() */
|
||||
timer->base = NULL;
|
||||
spin_unlock(&base->lock);
|
||||
spin_lock(&new_base->t_base.lock);
|
||||
timer->base = &new_base->t_base;
|
||||
}
|
||||
}
|
||||
|
||||
timer->expires = expires;
|
||||
internal_add_timer(new_base, timer);
|
||||
timer->base = new_base;
|
||||
|
||||
if (old_base && (new_base != old_base))
|
||||
spin_unlock(&old_base->lock);
|
||||
spin_unlock(&new_base->lock);
|
||||
spin_unlock_irqrestore(&timer->lock, flags);
|
||||
spin_unlock_irqrestore(&new_base->t_base.lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -232,15 +281,15 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
{
|
||||
tvec_base_t *base = &per_cpu(tvec_bases, cpu);
|
||||
unsigned long flags;
|
||||
|
||||
|
||||
BUG_ON(timer_pending(timer) || !timer->function);
|
||||
|
||||
check_timer(timer);
|
||||
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
spin_lock_irqsave(&base->t_base.lock, flags);
|
||||
timer->base = &base->t_base;
|
||||
internal_add_timer(base, timer);
|
||||
timer->base = base;
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
spin_unlock_irqrestore(&base->t_base.lock, flags);
|
||||
}
|
||||
|
||||
|
||||
@@ -295,32 +344,55 @@ EXPORT_SYMBOL(mod_timer);
|
||||
*/
|
||||
int del_timer(struct timer_list *timer)
|
||||
{
|
||||
timer_base_t *base;
|
||||
unsigned long flags;
|
||||
tvec_base_t *base;
|
||||
int ret = 0;
|
||||
|
||||
check_timer(timer);
|
||||
|
||||
repeat:
|
||||
base = timer->base;
|
||||
if (!base)
|
||||
return 0;
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
if (base != timer->base) {
|
||||
if (timer_pending(timer)) {
|
||||
base = lock_timer_base(timer, &flags);
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 1);
|
||||
ret = 1;
|
||||
}
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
goto repeat;
|
||||
}
|
||||
list_del(&timer->entry);
|
||||
/* Need to make sure that anybody who sees a NULL base also sees the list ops */
|
||||
smp_wmb();
|
||||
timer->base = NULL;
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
|
||||
return 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(del_timer);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* This function tries to deactivate a timer. Upon successful (ret >= 0)
|
||||
* exit the timer is not queued and the handler is not running on any CPU.
|
||||
*
|
||||
* It must not be called from interrupt contexts.
|
||||
*/
|
||||
int try_to_del_timer_sync(struct timer_list *timer)
|
||||
{
|
||||
timer_base_t *base;
|
||||
unsigned long flags;
|
||||
int ret = -1;
|
||||
|
||||
base = lock_timer_base(timer, &flags);
|
||||
|
||||
if (base->running_timer == timer)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 1);
|
||||
ret = 1;
|
||||
}
|
||||
out:
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/***
|
||||
* del_timer_sync - deactivate a timer and wait for the handler to finish.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -332,72 +404,24 @@ EXPORT_SYMBOL(del_timer);
|
||||
* Synchronization rules: callers must prevent restarting of the timer,
|
||||
* otherwise this function is meaningless. It must not be called from
|
||||
* interrupt contexts. The caller must not hold locks which would prevent
|
||||
* completion of the timer's handler. Upon exit the timer is not queued and
|
||||
* the handler is not running on any CPU.
|
||||
* completion of the timer's handler. The timer's handler must not call
|
||||
* add_timer_on(). Upon exit the timer is not queued and the handler is
|
||||
* not running on any CPU.
|
||||
*
|
||||
* The function returns whether it has deactivated a pending timer or not.
|
||||
*
|
||||
* del_timer_sync() is slow and complicated because it copes with timer
|
||||
* handlers which re-arm the timer (periodic timers). If the timer handler
|
||||
* is known to not do this (a single shot timer) then use
|
||||
* del_singleshot_timer_sync() instead.
|
||||
*/
|
||||
int del_timer_sync(struct timer_list *timer)
|
||||
{
|
||||
tvec_base_t *base;
|
||||
int i, ret = 0;
|
||||
|
||||
check_timer(timer);
|
||||
|
||||
del_again:
|
||||
ret += del_timer(timer);
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
base = &per_cpu(tvec_bases, i);
|
||||
if (base->running_timer == timer) {
|
||||
while (base->running_timer == timer) {
|
||||
cpu_relax();
|
||||
preempt_check_resched();
|
||||
}
|
||||
break;
|
||||
}
|
||||
for (;;) {
|
||||
int ret = try_to_del_timer_sync(timer);
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
}
|
||||
smp_rmb();
|
||||
if (timer_pending(timer))
|
||||
goto del_again;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(del_timer_sync);
|
||||
|
||||
/***
|
||||
* del_singleshot_timer_sync - deactivate a non-recursive timer
|
||||
* @timer: the timer to be deactivated
|
||||
*
|
||||
* This function is an optimization of del_timer_sync for the case where the
|
||||
* caller can guarantee the timer does not reschedule itself in its timer
|
||||
* function.
|
||||
*
|
||||
* Synchronization rules: callers must prevent restarting of the timer,
|
||||
* otherwise this function is meaningless. It must not be called from
|
||||
* interrupt contexts. The caller must not hold locks which wold prevent
|
||||
* completion of the timer's handler. Upon exit the timer is not queued and
|
||||
* the handler is not running on any CPU.
|
||||
*
|
||||
* The function returns whether it has deactivated a pending timer or not.
|
||||
*/
|
||||
int del_singleshot_timer_sync(struct timer_list *timer)
|
||||
{
|
||||
int ret = del_timer(timer);
|
||||
|
||||
if (!ret) {
|
||||
ret = del_timer_sync(timer);
|
||||
BUG_ON(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(del_singleshot_timer_sync);
|
||||
#endif
|
||||
|
||||
static int cascade(tvec_base_t *base, tvec_t *tv, int index)
|
||||
@@ -415,7 +439,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
|
||||
struct timer_list *tmp;
|
||||
|
||||
tmp = list_entry(curr, struct timer_list, entry);
|
||||
BUG_ON(tmp->base != base);
|
||||
BUG_ON(tmp->base != &base->t_base);
|
||||
curr = curr->next;
|
||||
internal_add_timer(base, tmp);
|
||||
}
|
||||
@@ -437,7 +461,7 @@ static inline void __run_timers(tvec_base_t *base)
|
||||
{
|
||||
struct timer_list *timer;
|
||||
|
||||
spin_lock_irq(&base->lock);
|
||||
spin_lock_irq(&base->t_base.lock);
|
||||
while (time_after_eq(jiffies, base->timer_jiffies)) {
|
||||
struct list_head work_list = LIST_HEAD_INIT(work_list);
|
||||
struct list_head *head = &work_list;
|
||||
@@ -453,8 +477,7 @@ static inline void __run_timers(tvec_base_t *base)
|
||||
cascade(base, &base->tv5, INDEX(3));
|
||||
++base->timer_jiffies;
|
||||
list_splice_init(base->tv1.vec + index, &work_list);
|
||||
repeat:
|
||||
if (!list_empty(head)) {
|
||||
while (!list_empty(head)) {
|
||||
void (*fn)(unsigned long);
|
||||
unsigned long data;
|
||||
|
||||
@@ -462,25 +485,26 @@ repeat:
|
||||
fn = timer->function;
|
||||
data = timer->data;
|
||||
|
||||
list_del(&timer->entry);
|
||||
set_running_timer(base, timer);
|
||||
smp_wmb();
|
||||
timer->base = NULL;
|
||||
spin_unlock_irq(&base->lock);
|
||||
detach_timer(timer, 1);
|
||||
spin_unlock_irq(&base->t_base.lock);
|
||||
{
|
||||
u32 preempt_count = preempt_count();
|
||||
int preempt_count = preempt_count();
|
||||
fn(data);
|
||||
if (preempt_count != preempt_count()) {
|
||||
printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
|
||||
printk(KERN_WARNING "huh, entered %p "
|
||||
"with preempt_count %08x, exited"
|
||||
" with %08x?\n",
|
||||
fn, preempt_count,
|
||||
preempt_count());
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
spin_lock_irq(&base->lock);
|
||||
goto repeat;
|
||||
spin_lock_irq(&base->t_base.lock);
|
||||
}
|
||||
}
|
||||
set_running_timer(base, NULL);
|
||||
spin_unlock_irq(&base->lock);
|
||||
spin_unlock_irq(&base->t_base.lock);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_IDLE_HZ
|
||||
@@ -499,7 +523,7 @@ unsigned long next_timer_interrupt(void)
|
||||
int i, j;
|
||||
|
||||
base = &__get_cpu_var(tvec_bases);
|
||||
spin_lock(&base->lock);
|
||||
spin_lock(&base->t_base.lock);
|
||||
expires = base->timer_jiffies + (LONG_MAX >> 1);
|
||||
list = 0;
|
||||
|
||||
@@ -547,7 +571,7 @@ found:
|
||||
expires = nte->expires;
|
||||
}
|
||||
}
|
||||
spin_unlock(&base->lock);
|
||||
spin_unlock(&base->t_base.lock);
|
||||
return expires;
|
||||
}
|
||||
#endif
|
||||
@@ -1286,9 +1310,9 @@ static void __devinit init_timers_cpu(int cpu)
|
||||
{
|
||||
int j;
|
||||
tvec_base_t *base;
|
||||
|
||||
|
||||
base = &per_cpu(tvec_bases, cpu);
|
||||
spin_lock_init(&base->lock);
|
||||
spin_lock_init(&base->t_base.lock);
|
||||
for (j = 0; j < TVN_SIZE; j++) {
|
||||
INIT_LIST_HEAD(base->tv5.vec + j);
|
||||
INIT_LIST_HEAD(base->tv4.vec + j);
|
||||
@@ -1302,22 +1326,16 @@ static void __devinit init_timers_cpu(int cpu)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
|
||||
static void migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
|
||||
{
|
||||
struct timer_list *timer;
|
||||
|
||||
while (!list_empty(head)) {
|
||||
timer = list_entry(head->next, struct timer_list, entry);
|
||||
/* We're locking backwards from __mod_timer order here,
|
||||
beware deadlock. */
|
||||
if (!spin_trylock(&timer->lock))
|
||||
return 0;
|
||||
list_del(&timer->entry);
|
||||
detach_timer(timer, 0);
|
||||
timer->base = &new_base->t_base;
|
||||
internal_add_timer(new_base, timer);
|
||||
timer->base = new_base;
|
||||
spin_unlock(&timer->lock);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __devinit migrate_timers(int cpu)
|
||||
@@ -1331,39 +1349,24 @@ static void __devinit migrate_timers(int cpu)
|
||||
new_base = &get_cpu_var(tvec_bases);
|
||||
|
||||
local_irq_disable();
|
||||
again:
|
||||
/* Prevent deadlocks via ordering by old_base < new_base. */
|
||||
if (old_base < new_base) {
|
||||
spin_lock(&new_base->lock);
|
||||
spin_lock(&old_base->lock);
|
||||
} else {
|
||||
spin_lock(&old_base->lock);
|
||||
spin_lock(&new_base->lock);
|
||||
}
|
||||
spin_lock(&new_base->t_base.lock);
|
||||
spin_lock(&old_base->t_base.lock);
|
||||
|
||||
if (old_base->running_timer)
|
||||
if (old_base->t_base.running_timer)
|
||||
BUG();
|
||||
for (i = 0; i < TVR_SIZE; i++)
|
||||
if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
|
||||
goto unlock_again;
|
||||
for (i = 0; i < TVN_SIZE; i++)
|
||||
if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
|
||||
|| !migrate_timer_list(new_base, old_base->tv3.vec + i)
|
||||
|| !migrate_timer_list(new_base, old_base->tv4.vec + i)
|
||||
|| !migrate_timer_list(new_base, old_base->tv5.vec + i))
|
||||
goto unlock_again;
|
||||
spin_unlock(&old_base->lock);
|
||||
spin_unlock(&new_base->lock);
|
||||
migrate_timer_list(new_base, old_base->tv1.vec + i);
|
||||
for (i = 0; i < TVN_SIZE; i++) {
|
||||
migrate_timer_list(new_base, old_base->tv2.vec + i);
|
||||
migrate_timer_list(new_base, old_base->tv3.vec + i);
|
||||
migrate_timer_list(new_base, old_base->tv4.vec + i);
|
||||
migrate_timer_list(new_base, old_base->tv5.vec + i);
|
||||
}
|
||||
|
||||
spin_unlock(&old_base->t_base.lock);
|
||||
spin_unlock(&new_base->t_base.lock);
|
||||
local_irq_enable();
|
||||
put_cpu_var(tvec_bases);
|
||||
return;
|
||||
|
||||
unlock_again:
|
||||
/* Avoid deadlock with __mod_timer, by backing off. */
|
||||
spin_unlock(&old_base->lock);
|
||||
spin_unlock(&new_base->lock);
|
||||
cpu_relax();
|
||||
goto again;
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
@@ -1594,7 +1597,7 @@ void msleep(unsigned int msecs)
|
||||
EXPORT_SYMBOL(msleep);
|
||||
|
||||
/**
|
||||
* msleep_interruptible - sleep waiting for waitqueue interruptions
|
||||
* msleep_interruptible - sleep waiting for signals
|
||||
* @msecs: Time in milliseconds to sleep for
|
||||
*/
|
||||
unsigned long msleep_interruptible(unsigned int msecs)
|
||||
|
Reference in New Issue
Block a user