Merge linux-2.6 with linux-acpi-2.6
This commit is contained in:
@@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o
|
||||
obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o
|
||||
obj-$(CONFIG_SYSFS) += ksysfs.o
|
||||
obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
|
||||
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
|
||||
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
|
||||
obj-$(CONFIG_SECCOMP) += seccomp.o
|
||||
|
@@ -220,7 +220,7 @@ asmlinkage long sys_acct(const char __user *name)
|
||||
return (PTR_ERR(tmp));
|
||||
}
|
||||
/* Difference from BSD - they don't do O_APPEND */
|
||||
file = filp_open(tmp, O_WRONLY|O_APPEND, 0);
|
||||
file = filp_open(tmp, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
|
||||
putname(tmp);
|
||||
if (IS_ERR(file)) {
|
||||
return (PTR_ERR(file));
|
||||
|
127
kernel/cpuset.c
127
kernel/cpuset.c
@@ -628,13 +628,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
|
||||
* lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Hack to avoid 2.6.13 partial node dynamic sched domain bug.
|
||||
* Disable letting 'cpu_exclusive' cpusets define dynamic sched
|
||||
* domains, until the sched domain can handle partial nodes.
|
||||
* Remove this #if hackery when sched domains fixed.
|
||||
*/
|
||||
#if 0
|
||||
static void update_cpu_domains(struct cpuset *cur)
|
||||
{
|
||||
struct cpuset *c, *par = cur->parent;
|
||||
@@ -675,11 +668,6 @@ static void update_cpu_domains(struct cpuset *cur)
|
||||
partition_sched_domains(&pspan, &cspan);
|
||||
unlock_cpu_hotplug();
|
||||
}
|
||||
#else
|
||||
static void update_cpu_domains(struct cpuset *cur)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static int update_cpumask(struct cpuset *cs, char *buf)
|
||||
{
|
||||
@@ -1611,17 +1599,114 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_zone_allowed - is zone z allowed in current->mems_allowed
|
||||
* @z: zone in question
|
||||
*
|
||||
* Is zone z allowed in current->mems_allowed, or is
|
||||
* the CPU in interrupt context? (zone is always allowed in this case)
|
||||
/*
|
||||
* nearest_exclusive_ancestor() - Returns the nearest mem_exclusive
|
||||
* ancestor to the specified cpuset. Call while holding cpuset_sem.
|
||||
* If no ancestor is mem_exclusive (an unusual configuration), then
|
||||
* returns the root cpuset.
|
||||
*/
|
||||
int cpuset_zone_allowed(struct zone *z)
|
||||
static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
|
||||
{
|
||||
return in_interrupt() ||
|
||||
node_isset(z->zone_pgdat->node_id, current->mems_allowed);
|
||||
while (!is_mem_exclusive(cs) && cs->parent)
|
||||
cs = cs->parent;
|
||||
return cs;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
|
||||
* @z: is this zone on an allowed node?
|
||||
* @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
|
||||
*
|
||||
* If we're in interrupt, yes, we can always allocate. If zone
|
||||
* z's node is in our tasks mems_allowed, yes. If it's not a
|
||||
* __GFP_HARDWALL request and this zone's nodes is in the nearest
|
||||
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
|
||||
* Otherwise, no.
|
||||
*
|
||||
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
|
||||
* and do not allow allocations outside the current tasks cpuset.
|
||||
* GFP_KERNEL allocations are not so marked, so can escape to the
|
||||
* nearest mem_exclusive ancestor cpuset.
|
||||
*
|
||||
* Scanning up parent cpusets requires cpuset_sem. The __alloc_pages()
|
||||
* routine only calls here with __GFP_HARDWALL bit _not_ set if
|
||||
* it's a GFP_KERNEL allocation, and all nodes in the current tasks
|
||||
* mems_allowed came up empty on the first pass over the zonelist.
|
||||
* So only GFP_KERNEL allocations, if all nodes in the cpuset are
|
||||
* short of memory, might require taking the cpuset_sem semaphore.
|
||||
*
|
||||
* The first loop over the zonelist in mm/page_alloc.c:__alloc_pages()
|
||||
* calls here with __GFP_HARDWALL always set in gfp_mask, enforcing
|
||||
* hardwall cpusets - no allocation on a node outside the cpuset is
|
||||
* allowed (unless in interrupt, of course).
|
||||
*
|
||||
* The second loop doesn't even call here for GFP_ATOMIC requests
|
||||
* (if the __alloc_pages() local variable 'wait' is set). That check
|
||||
* and the checks below have the combined affect in the second loop of
|
||||
* the __alloc_pages() routine that:
|
||||
* in_interrupt - any node ok (current task context irrelevant)
|
||||
* GFP_ATOMIC - any node ok
|
||||
* GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok
|
||||
* GFP_USER - only nodes in current tasks mems allowed ok.
|
||||
**/
|
||||
|
||||
int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask)
|
||||
{
|
||||
int node; /* node that zone z is on */
|
||||
const struct cpuset *cs; /* current cpuset ancestors */
|
||||
int allowed = 1; /* is allocation in zone z allowed? */
|
||||
|
||||
if (in_interrupt())
|
||||
return 1;
|
||||
node = z->zone_pgdat->node_id;
|
||||
if (node_isset(node, current->mems_allowed))
|
||||
return 1;
|
||||
if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */
|
||||
return 0;
|
||||
|
||||
/* Not hardwall and node outside mems_allowed: scan up cpusets */
|
||||
down(&cpuset_sem);
|
||||
cs = current->cpuset;
|
||||
if (!cs)
|
||||
goto done; /* current task exiting */
|
||||
cs = nearest_exclusive_ancestor(cs);
|
||||
allowed = node_isset(node, cs->mems_allowed);
|
||||
done:
|
||||
up(&cpuset_sem);
|
||||
return allowed;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors?
|
||||
* @p: pointer to task_struct of some other task.
|
||||
*
|
||||
* Description: Return true if the nearest mem_exclusive ancestor
|
||||
* cpusets of tasks @p and current overlap. Used by oom killer to
|
||||
* determine if task @p's memory usage might impact the memory
|
||||
* available to the current task.
|
||||
*
|
||||
* Acquires cpuset_sem - not suitable for calling from a fast path.
|
||||
**/
|
||||
|
||||
int cpuset_excl_nodes_overlap(const struct task_struct *p)
|
||||
{
|
||||
const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */
|
||||
int overlap = 0; /* do cpusets overlap? */
|
||||
|
||||
down(&cpuset_sem);
|
||||
cs1 = current->cpuset;
|
||||
if (!cs1)
|
||||
goto done; /* current task exiting */
|
||||
cs2 = p->cpuset;
|
||||
if (!cs2)
|
||||
goto done; /* task p is exiting */
|
||||
cs1 = nearest_exclusive_ancestor(cs1);
|
||||
cs2 = nearest_exclusive_ancestor(cs2);
|
||||
overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
|
||||
done:
|
||||
up(&cpuset_sem);
|
||||
|
||||
return overlap;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -994,6 +994,9 @@ static task_t *copy_process(unsigned long clone_flags,
|
||||
* of CLONE_PTRACE.
|
||||
*/
|
||||
clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
|
||||
#ifdef TIF_SYSCALL_EMU
|
||||
clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
|
||||
#endif
|
||||
|
||||
/* Our parent execution domain becomes current domain
|
||||
These must match for thread signalling to apply */
|
||||
|
137
kernel/futex.c
137
kernel/futex.c
@@ -40,6 +40,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/signal.h>
|
||||
#include <asm/futex.h>
|
||||
|
||||
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
|
||||
|
||||
@@ -326,6 +327,118 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up all waiters hashed on the physical page that is mapped
|
||||
* to this virtual address:
|
||||
*/
|
||||
static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op)
|
||||
{
|
||||
union futex_key key1, key2;
|
||||
struct futex_hash_bucket *bh1, *bh2;
|
||||
struct list_head *head;
|
||||
struct futex_q *this, *next;
|
||||
int ret, op_ret, attempt = 0;
|
||||
|
||||
retryfull:
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_futex_key(uaddr1, &key1);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
ret = get_futex_key(uaddr2, &key2);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
bh1 = hash_futex(&key1);
|
||||
bh2 = hash_futex(&key2);
|
||||
|
||||
retry:
|
||||
if (bh1 < bh2)
|
||||
spin_lock(&bh1->lock);
|
||||
spin_lock(&bh2->lock);
|
||||
if (bh1 > bh2)
|
||||
spin_lock(&bh1->lock);
|
||||
|
||||
op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2);
|
||||
if (unlikely(op_ret < 0)) {
|
||||
int dummy;
|
||||
|
||||
spin_unlock(&bh1->lock);
|
||||
if (bh1 != bh2)
|
||||
spin_unlock(&bh2->lock);
|
||||
|
||||
/* futex_atomic_op_inuser needs to both read and write
|
||||
* *(int __user *)uaddr2, but we can't modify it
|
||||
* non-atomically. Therefore, if get_user below is not
|
||||
* enough, we need to handle the fault ourselves, while
|
||||
* still holding the mmap_sem. */
|
||||
if (attempt++) {
|
||||
struct vm_area_struct * vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
|
||||
ret = -EFAULT;
|
||||
if (attempt >= 2 ||
|
||||
!(vma = find_vma(mm, uaddr2)) ||
|
||||
vma->vm_start > uaddr2 ||
|
||||
!(vma->vm_flags & VM_WRITE))
|
||||
goto out;
|
||||
|
||||
switch (handle_mm_fault(mm, vma, uaddr2, 1)) {
|
||||
case VM_FAULT_MINOR:
|
||||
current->min_flt++;
|
||||
break;
|
||||
case VM_FAULT_MAJOR:
|
||||
current->maj_flt++;
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
}
|
||||
goto retry;
|
||||
}
|
||||
|
||||
/* If we would have faulted, release mmap_sem,
|
||||
* fault it in and start all over again. */
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
|
||||
ret = get_user(dummy, (int __user *)uaddr2);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
goto retryfull;
|
||||
}
|
||||
|
||||
head = &bh1->chain;
|
||||
|
||||
list_for_each_entry_safe(this, next, head, list) {
|
||||
if (match_futex (&this->key, &key1)) {
|
||||
wake_futex(this);
|
||||
if (++ret >= nr_wake)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (op_ret > 0) {
|
||||
head = &bh2->chain;
|
||||
|
||||
op_ret = 0;
|
||||
list_for_each_entry_safe(this, next, head, list) {
|
||||
if (match_futex (&this->key, &key2)) {
|
||||
wake_futex(this);
|
||||
if (++op_ret >= nr_wake2)
|
||||
break;
|
||||
}
|
||||
}
|
||||
ret += op_ret;
|
||||
}
|
||||
|
||||
spin_unlock(&bh1->lock);
|
||||
if (bh1 != bh2)
|
||||
spin_unlock(&bh2->lock);
|
||||
out:
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Requeue all waiters hashed on one physical page to another
|
||||
* physical page.
|
||||
@@ -673,23 +786,17 @@ static int futex_fd(unsigned long uaddr, int signal)
|
||||
filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
|
||||
|
||||
if (signal) {
|
||||
int err;
|
||||
err = f_setown(filp, current->pid, 1);
|
||||
if (err < 0) {
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
ret = err;
|
||||
goto out;
|
||||
goto error;
|
||||
}
|
||||
filp->f_owner.signum = signal;
|
||||
}
|
||||
|
||||
q = kmalloc(sizeof(*q), GFP_KERNEL);
|
||||
if (!q) {
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
err = -ENOMEM;
|
||||
goto error;
|
||||
}
|
||||
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
@@ -697,10 +804,8 @@ static int futex_fd(unsigned long uaddr, int signal)
|
||||
|
||||
if (unlikely(err != 0)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
kfree(q);
|
||||
return err;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -716,6 +821,11 @@ static int futex_fd(unsigned long uaddr, int signal)
|
||||
fd_install(ret, filp);
|
||||
out:
|
||||
return ret;
|
||||
error:
|
||||
put_unused_fd(ret);
|
||||
put_filp(filp);
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
|
||||
@@ -740,6 +850,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout,
|
||||
case FUTEX_CMP_REQUEUE:
|
||||
ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
|
||||
break;
|
||||
case FUTEX_WAKE_OP:
|
||||
ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOSYS;
|
||||
}
|
||||
|
@@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void
|
||||
struct list_head *tmp;
|
||||
struct inter_module_entry *ime, *ime_new;
|
||||
|
||||
if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) {
|
||||
if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) {
|
||||
/* Overloaded kernel, not fatal */
|
||||
printk(KERN_ERR
|
||||
"Aiee, inter_module_register: cannot kmalloc entry for '%s'\n",
|
||||
@@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void
|
||||
kmalloc_failed = 1;
|
||||
return;
|
||||
}
|
||||
memset(ime_new, 0, sizeof(*ime_new));
|
||||
ime_new->im_name = im_name;
|
||||
ime_new->owner = owner;
|
||||
ime_new->userdata = userdata;
|
||||
|
@@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs)
|
||||
unsigned int status;
|
||||
|
||||
kstat_this_cpu.irqs[irq]++;
|
||||
if (desc->status & IRQ_PER_CPU) {
|
||||
if (CHECK_IRQ_PER_CPU(desc->status)) {
|
||||
irqreturn_t action_ret;
|
||||
|
||||
/*
|
||||
|
@@ -18,6 +18,10 @@
|
||||
|
||||
cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL };
|
||||
|
||||
#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE)
|
||||
cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
|
||||
#endif
|
||||
|
||||
/**
|
||||
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
|
||||
*
|
||||
|
@@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS];
|
||||
*/
|
||||
static struct proc_dir_entry *smp_affinity_entry[NR_IRQS];
|
||||
|
||||
void __attribute__((weak))
|
||||
proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
|
||||
{
|
||||
/*
|
||||
* Save these away for later use. Re-progam when the
|
||||
* interrupt is pending
|
||||
*/
|
||||
set_pending_irq(irq, mask_val);
|
||||
}
|
||||
#else
|
||||
void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
|
||||
{
|
||||
irq_affinity[irq] = mask_val;
|
||||
irq_desc[irq].handler->set_affinity(irq, mask_val);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int irq_affinity_read_proc(char *page, char **start, off_t off,
|
||||
int count, int *eof, void *data)
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/moduleloader.h>
|
||||
#include <asm-generic/sections.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/kdebug.h>
|
||||
@@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages;
|
||||
* get_insn_slot() - Find a slot on an executable page for an instruction.
|
||||
* We allocate an executable page if there's no room on existing ones.
|
||||
*/
|
||||
kprobe_opcode_t *get_insn_slot(void)
|
||||
kprobe_opcode_t __kprobes *get_insn_slot(void)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
@@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void)
|
||||
return kip->insns;
|
||||
}
|
||||
|
||||
void free_insn_slot(kprobe_opcode_t *slot)
|
||||
void __kprobes free_insn_slot(kprobe_opcode_t *slot)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
struct hlist_node *pos;
|
||||
@@ -152,20 +153,42 @@ void free_insn_slot(kprobe_opcode_t *slot)
|
||||
}
|
||||
|
||||
/* Locks kprobe: irqs must be disabled */
|
||||
void lock_kprobes(void)
|
||||
void __kprobes lock_kprobes(void)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
/* Avoiding local interrupts to happen right after we take the kprobe_lock
|
||||
* and before we get a chance to update kprobe_cpu, this to prevent
|
||||
* deadlock when we have a kprobe on ISR routine and a kprobe on task
|
||||
* routine
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
spin_lock(&kprobe_lock);
|
||||
kprobe_cpu = smp_processor_id();
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void unlock_kprobes(void)
|
||||
void __kprobes unlock_kprobes(void)
|
||||
{
|
||||
unsigned long flags = 0;
|
||||
|
||||
/* Avoiding local interrupts to happen right after we update
|
||||
* kprobe_cpu and before we get a a chance to release kprobe_lock,
|
||||
* this to prevent deadlock when we have a kprobe on ISR routine and
|
||||
* a kprobe on task routine
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
kprobe_cpu = NR_CPUS;
|
||||
spin_unlock(&kprobe_lock);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/* You have to be holding the kprobe_lock */
|
||||
struct kprobe *get_kprobe(void *addr)
|
||||
struct kprobe __kprobes *get_kprobe(void *addr)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct hlist_node *node;
|
||||
@@ -183,7 +206,7 @@ struct kprobe *get_kprobe(void *addr)
|
||||
* Aggregate handlers for multiple kprobes support - these handlers
|
||||
* take care of invoking the individual kprobe handlers on p->list
|
||||
*/
|
||||
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -198,8 +221,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -213,8 +236,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
return;
|
||||
}
|
||||
|
||||
static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
{
|
||||
/*
|
||||
* if we faulted "during" the execution of a user specified
|
||||
@@ -227,7 +250,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp = curr_kprobe;
|
||||
if (curr_kprobe && kp->break_handler) {
|
||||
@@ -240,7 +263,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
|
||||
struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
@@ -249,7 +272,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
|
||||
static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe
|
||||
*rp)
|
||||
{
|
||||
struct hlist_node *node;
|
||||
struct kretprobe_instance *ri;
|
||||
@@ -258,7 +282,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void add_rp_inst(struct kretprobe_instance *ri)
|
||||
void __kprobes add_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
/*
|
||||
* Remove rp inst off the free list -
|
||||
@@ -276,7 +300,7 @@ void add_rp_inst(struct kretprobe_instance *ri)
|
||||
hlist_add_head(&ri->uflist, &ri->rp->used_instances);
|
||||
}
|
||||
|
||||
void recycle_rp_inst(struct kretprobe_instance *ri)
|
||||
void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
|
||||
{
|
||||
/* remove rp inst off the rprobe_inst_table */
|
||||
hlist_del(&ri->hlist);
|
||||
@@ -291,7 +315,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri)
|
||||
kfree(ri);
|
||||
}
|
||||
|
||||
struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
|
||||
struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
|
||||
{
|
||||
return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)];
|
||||
}
|
||||
@@ -302,7 +326,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk)
|
||||
* instances associated with this task. These left over instances represent
|
||||
* probed functions that have been called but will never return.
|
||||
*/
|
||||
void kprobe_flush_task(struct task_struct *tk)
|
||||
void __kprobes kprobe_flush_task(struct task_struct *tk)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_head *head;
|
||||
@@ -322,7 +346,8 @@ void kprobe_flush_task(struct task_struct *tk)
|
||||
* This kprobe pre_handler is registered with every kretprobe. When probe
|
||||
* hits it will set up the return probe.
|
||||
*/
|
||||
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
|
||||
|
||||
@@ -353,7 +378,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
* Add the new probe to old_p->list. Fail if this is the
|
||||
* second jprobe at the address - two jprobes can't coexist
|
||||
*/
|
||||
static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -395,7 +420,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
* the intricacies
|
||||
* TODO: Move kcalloc outside the spinlock
|
||||
*/
|
||||
static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p)
|
||||
static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
|
||||
struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *ap;
|
||||
@@ -434,15 +460,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p,
|
||||
spin_unlock_irqrestore(&kprobe_lock, flags);
|
||||
}
|
||||
|
||||
int register_kprobe(struct kprobe *p)
|
||||
static int __kprobes in_kprobes_functions(unsigned long addr)
|
||||
{
|
||||
if (addr >= (unsigned long)__kprobes_text_start
|
||||
&& addr < (unsigned long)__kprobes_text_end)
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __kprobes register_kprobe(struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long flags = 0;
|
||||
struct kprobe *old_p;
|
||||
|
||||
if ((ret = arch_prepare_kprobe(p)) != 0) {
|
||||
if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0)
|
||||
return ret;
|
||||
if ((ret = arch_prepare_kprobe(p)) != 0)
|
||||
goto rm_kprobe;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&kprobe_lock, flags);
|
||||
old_p = get_kprobe(p->addr);
|
||||
p->nmissed = 0;
|
||||
@@ -466,7 +502,7 @@ rm_kprobe:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void unregister_kprobe(struct kprobe *p)
|
||||
void __kprobes unregister_kprobe(struct kprobe *p)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kprobe *old_p;
|
||||
@@ -487,7 +523,7 @@ static struct notifier_block kprobe_exceptions_nb = {
|
||||
.priority = 0x7fffffff /* we need to notified first */
|
||||
};
|
||||
|
||||
int register_jprobe(struct jprobe *jp)
|
||||
int __kprobes register_jprobe(struct jprobe *jp)
|
||||
{
|
||||
/* Todo: Verify probepoint is a function entry point */
|
||||
jp->kp.pre_handler = setjmp_pre_handler;
|
||||
@@ -496,14 +532,14 @@ int register_jprobe(struct jprobe *jp)
|
||||
return register_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
void unregister_jprobe(struct jprobe *jp)
|
||||
void __kprobes unregister_jprobe(struct jprobe *jp)
|
||||
{
|
||||
unregister_kprobe(&jp->kp);
|
||||
}
|
||||
|
||||
#ifdef ARCH_SUPPORTS_KRETPROBES
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kretprobe_instance *inst;
|
||||
@@ -540,14 +576,14 @@ int register_kretprobe(struct kretprobe *rp)
|
||||
|
||||
#else /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
#endif /* ARCH_SUPPORTS_KRETPROBES */
|
||||
|
||||
void unregister_kretprobe(struct kretprobe *rp)
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct kretprobe_instance *ri;
|
||||
|
@@ -1509,6 +1509,7 @@ static struct module *load_module(void __user *umod,
|
||||
long err = 0;
|
||||
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
|
||||
struct exception_table_entry *extable;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
|
||||
umod, len, uargs);
|
||||
@@ -1779,6 +1780,24 @@ static struct module *load_module(void __user *umod,
|
||||
if (err < 0)
|
||||
goto cleanup;
|
||||
|
||||
/* flush the icache in correct context */
|
||||
old_fs = get_fs();
|
||||
set_fs(KERNEL_DS);
|
||||
|
||||
/*
|
||||
* Flush the instruction cache, since we've played with text.
|
||||
* Do it before processing of module parameters, so the module
|
||||
* can provide parameter accessor functions of its own.
|
||||
*/
|
||||
if (mod->module_init)
|
||||
flush_icache_range((unsigned long)mod->module_init,
|
||||
(unsigned long)mod->module_init
|
||||
+ mod->init_size);
|
||||
flush_icache_range((unsigned long)mod->module_core,
|
||||
(unsigned long)mod->module_core + mod->core_size);
|
||||
|
||||
set_fs(old_fs);
|
||||
|
||||
mod->args = args;
|
||||
if (obsparmindex) {
|
||||
err = obsolete_params(mod->name, mod->args,
|
||||
@@ -1860,7 +1879,6 @@ sys_init_module(void __user *umod,
|
||||
const char __user *uargs)
|
||||
{
|
||||
struct module *mod;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
int ret = 0;
|
||||
|
||||
/* Must have permission */
|
||||
@@ -1878,19 +1896,6 @@ sys_init_module(void __user *umod,
|
||||
return PTR_ERR(mod);
|
||||
}
|
||||
|
||||
/* flush the icache in correct context */
|
||||
set_fs(KERNEL_DS);
|
||||
|
||||
/* Flush the instruction cache, since we've played with text */
|
||||
if (mod->module_init)
|
||||
flush_icache_range((unsigned long)mod->module_init,
|
||||
(unsigned long)mod->module_init
|
||||
+ mod->init_size);
|
||||
flush_icache_range((unsigned long)mod->module_core,
|
||||
(unsigned long)mod->module_core + mod->core_size);
|
||||
|
||||
set_fs(old_fs);
|
||||
|
||||
/* Now sew it into the lists. They won't access us, since
|
||||
strong_try_module_get() will fail. */
|
||||
stop_machine_run(__link_module, mod, NR_CPUS);
|
||||
|
@@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name,
|
||||
{
|
||||
struct module_kobject *mk;
|
||||
|
||||
mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL);
|
||||
memset(mk, 0, sizeof(struct module_kobject));
|
||||
mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
|
||||
BUG_ON(!mk);
|
||||
|
||||
mk->mod = THIS_MODULE;
|
||||
kobj_set_kset_s(mk, module_subsys);
|
||||
|
@@ -427,21 +427,23 @@ int posix_timer_event(struct k_itimer *timr,int si_private)
|
||||
timr->sigq->info.si_code = SI_TIMER;
|
||||
timr->sigq->info.si_tid = timr->it_id;
|
||||
timr->sigq->info.si_value = timr->it_sigev_value;
|
||||
|
||||
if (timr->it_sigev_notify & SIGEV_THREAD_ID) {
|
||||
if (unlikely(timr->it_process->flags & PF_EXITING)) {
|
||||
timr->it_sigev_notify = SIGEV_SIGNAL;
|
||||
put_task_struct(timr->it_process);
|
||||
timr->it_process = timr->it_process->group_leader;
|
||||
goto group;
|
||||
}
|
||||
return send_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
}
|
||||
else {
|
||||
group:
|
||||
return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
struct task_struct *leader;
|
||||
int ret = send_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
|
||||
if (likely(ret >= 0))
|
||||
return ret;
|
||||
|
||||
timr->it_sigev_notify = SIGEV_SIGNAL;
|
||||
leader = timr->it_process->group_leader;
|
||||
put_task_struct(timr->it_process);
|
||||
timr->it_process = leader;
|
||||
}
|
||||
|
||||
return send_group_sigqueue(timr->it_sigev_signo, timr->sigq,
|
||||
timr->it_process);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(posix_timer_event);
|
||||
|
||||
|
@@ -29,7 +29,7 @@ config PM_DEBUG
|
||||
|
||||
config SOFTWARE_SUSPEND
|
||||
bool "Software Suspend"
|
||||
depends on EXPERIMENTAL && PM && SWAP && ((X86 && SMP) || ((FVR || PPC32 || X86) && !SMP))
|
||||
depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP))
|
||||
---help---
|
||||
Enable the possibility of suspending the machine.
|
||||
It doesn't need APM.
|
||||
@@ -73,6 +73,18 @@ config PM_STD_PARTITION
|
||||
suspended image to. It will simply pick the first available swap
|
||||
device.
|
||||
|
||||
config SWSUSP_ENCRYPT
|
||||
bool "Encrypt suspend image"
|
||||
depends on SOFTWARE_SUSPEND && CRYPTO=y && (CRYPTO_AES=y || CRYPTO_AES_586=y || CRYPTO_AES_X86_64=y)
|
||||
default ""
|
||||
---help---
|
||||
To prevent data gathering from swap after resume you can encrypt
|
||||
the suspend image with a temporary key that is deleted on
|
||||
resume.
|
||||
|
||||
Note that the temporary key is stored unencrypted on disk while the
|
||||
system is suspended.
|
||||
|
||||
config SUSPEND_SMP
|
||||
bool
|
||||
depends on HOTPLUG_CPU && X86 && PM
|
||||
|
@@ -112,24 +112,12 @@ static inline void platform_finish(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void finish(void)
|
||||
{
|
||||
device_resume();
|
||||
platform_finish();
|
||||
thaw_processes();
|
||||
enable_nonboot_cpus();
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
|
||||
static int prepare_processes(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
pm_prepare_console();
|
||||
|
||||
sys_sync();
|
||||
|
||||
disable_nonboot_cpus();
|
||||
|
||||
if (freeze_processes()) {
|
||||
@@ -162,15 +150,6 @@ static void unprepare_processes(void)
|
||||
pm_restore_console();
|
||||
}
|
||||
|
||||
static int prepare_devices(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if ((error = device_suspend(PMSG_FREEZE)))
|
||||
printk("Some devices failed to suspend\n");
|
||||
return error;
|
||||
}
|
||||
|
||||
/**
|
||||
* pm_suspend_disk - The granpappy of power management.
|
||||
*
|
||||
@@ -187,17 +166,14 @@ int pm_suspend_disk(void)
|
||||
error = prepare_processes();
|
||||
if (error)
|
||||
return error;
|
||||
error = prepare_devices();
|
||||
|
||||
error = device_suspend(PMSG_FREEZE);
|
||||
if (error) {
|
||||
printk("Some devices failed to suspend\n");
|
||||
unprepare_processes();
|
||||
return error;
|
||||
}
|
||||
|
||||
pr_debug("PM: Attempting to suspend to disk.\n");
|
||||
if (pm_disk_mode == PM_DISK_FIRMWARE)
|
||||
return pm_ops->enter(PM_SUSPEND_DISK);
|
||||
|
||||
pr_debug("PM: snapshotting memory.\n");
|
||||
in_suspend = 1;
|
||||
if ((error = swsusp_suspend()))
|
||||
@@ -208,11 +184,20 @@ int pm_suspend_disk(void)
|
||||
error = swsusp_write();
|
||||
if (!error)
|
||||
power_down(pm_disk_mode);
|
||||
else {
|
||||
/* swsusp_write can not fail in device_resume,
|
||||
no need to do second device_resume */
|
||||
swsusp_free();
|
||||
unprepare_processes();
|
||||
return error;
|
||||
}
|
||||
} else
|
||||
pr_debug("PM: Image restored successfully.\n");
|
||||
|
||||
swsusp_free();
|
||||
Done:
|
||||
finish();
|
||||
device_resume();
|
||||
unprepare_processes();
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -233,9 +218,12 @@ static int software_resume(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
down(&pm_sem);
|
||||
if (!swsusp_resume_device) {
|
||||
if (!strlen(resume_file))
|
||||
if (!strlen(resume_file)) {
|
||||
up(&pm_sem);
|
||||
return -ENOENT;
|
||||
}
|
||||
swsusp_resume_device = name_to_dev_t(resume_file);
|
||||
pr_debug("swsusp: Resume From Partition %s\n", resume_file);
|
||||
} else {
|
||||
@@ -248,6 +236,7 @@ static int software_resume(void)
|
||||
* FIXME: If noresume is specified, we need to find the partition
|
||||
* and reset it back to normal swap space.
|
||||
*/
|
||||
up(&pm_sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -270,20 +259,24 @@ static int software_resume(void)
|
||||
|
||||
pr_debug("PM: Preparing devices for restore.\n");
|
||||
|
||||
if ((error = prepare_devices()))
|
||||
if ((error = device_suspend(PMSG_FREEZE))) {
|
||||
printk("Some devices failed to suspend\n");
|
||||
goto Free;
|
||||
}
|
||||
|
||||
mb();
|
||||
|
||||
pr_debug("PM: Restoring saved image.\n");
|
||||
swsusp_resume();
|
||||
pr_debug("PM: Restore failed, recovering.n");
|
||||
finish();
|
||||
device_resume();
|
||||
Free:
|
||||
swsusp_free();
|
||||
Cleanup:
|
||||
unprepare_processes();
|
||||
Done:
|
||||
/* For success case, the suspend path will release the lock */
|
||||
up(&pm_sem);
|
||||
pr_debug("PM: Resume from disk failed.\n");
|
||||
return 0;
|
||||
}
|
||||
@@ -390,7 +383,9 @@ static ssize_t resume_store(struct subsystem * subsys, const char * buf, size_t
|
||||
if (sscanf(buf, "%u:%u", &maj, &min) == 2) {
|
||||
res = MKDEV(maj,min);
|
||||
if (maj == MAJOR(res) && min == MINOR(res)) {
|
||||
down(&pm_sem);
|
||||
swsusp_resume_device = res;
|
||||
up(&pm_sem);
|
||||
printk("Attempting manual resume\n");
|
||||
noresume = 0;
|
||||
software_resume();
|
||||
|
@@ -143,11 +143,12 @@ static void suspend_finish(suspend_state_t state)
|
||||
|
||||
|
||||
|
||||
static char * pm_states[] = {
|
||||
static char *pm_states[PM_SUSPEND_MAX] = {
|
||||
[PM_SUSPEND_STANDBY] = "standby",
|
||||
[PM_SUSPEND_MEM] = "mem",
|
||||
#ifdef CONFIG_SOFTWARE_SUSPEND
|
||||
[PM_SUSPEND_DISK] = "disk",
|
||||
NULL,
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
|
@@ -60,9 +60,8 @@ struct pm_dev *pm_register(pm_dev_t type,
|
||||
unsigned long id,
|
||||
pm_callback callback)
|
||||
{
|
||||
struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL);
|
||||
struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL);
|
||||
if (dev) {
|
||||
memset(dev, 0, sizeof(*dev));
|
||||
dev->type = type;
|
||||
dev->id = id;
|
||||
dev->callback = callback;
|
||||
|
@@ -38,7 +38,6 @@ void refrigerator(void)
|
||||
processes around? */
|
||||
long save;
|
||||
save = current->state;
|
||||
current->state = TASK_UNINTERRUPTIBLE;
|
||||
pr_debug("%s entered refrigerator\n", current->comm);
|
||||
printk("=");
|
||||
|
||||
@@ -47,8 +46,10 @@ void refrigerator(void)
|
||||
recalc_sigpending(); /* We sent fake signal, clean it up */
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
|
||||
while (frozen(current))
|
||||
while (frozen(current)) {
|
||||
current->state = TASK_UNINTERRUPTIBLE;
|
||||
schedule();
|
||||
}
|
||||
pr_debug("%s left refrigerator\n", current->comm);
|
||||
current->state = save;
|
||||
}
|
||||
@@ -80,13 +81,33 @@ int freeze_processes(void)
|
||||
} while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
yield(); /* Yield is okay here */
|
||||
if (time_after(jiffies, start_time + TIMEOUT)) {
|
||||
if (todo && time_after(jiffies, start_time + TIMEOUT)) {
|
||||
printk( "\n" );
|
||||
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
|
||||
return todo;
|
||||
break;
|
||||
}
|
||||
} while(todo);
|
||||
|
||||
/* This does not unfreeze processes that are already frozen
|
||||
* (we have slightly ugly calling convention in that respect,
|
||||
* and caller must call thaw_processes() if something fails),
|
||||
* but it cleans up leftover PF_FREEZE requests.
|
||||
*/
|
||||
if (todo) {
|
||||
read_lock(&tasklist_lock);
|
||||
do_each_thread(g, p)
|
||||
if (freezing(p)) {
|
||||
pr_debug(" clean up: %s\n", p->comm);
|
||||
p->flags &= ~PF_FREEZE;
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
recalc_sigpending_tsk(p);
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
}
|
||||
while_each_thread(g, p);
|
||||
read_unlock(&tasklist_lock);
|
||||
return todo;
|
||||
}
|
||||
|
||||
printk( "|\n" );
|
||||
BUG_ON(in_atomic());
|
||||
return 0;
|
||||
|
@@ -31,6 +31,9 @@
|
||||
* Alex Badea <vampire@go.ro>:
|
||||
* Fixed runaway init
|
||||
*
|
||||
* Andreas Steinmetz <ast@domdv.de>:
|
||||
* Added encrypted suspend option
|
||||
*
|
||||
* More state savers are welcome. Especially for the scsi layer...
|
||||
*
|
||||
* For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
|
||||
@@ -71,8 +74,16 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
#include <linux/random.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <asm/scatterlist.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
#define CIPHER "aes"
|
||||
#define MAXKEY 32
|
||||
#define MAXIV 32
|
||||
|
||||
/* References to section boundaries */
|
||||
extern const void __nosave_begin, __nosave_end;
|
||||
|
||||
@@ -103,7 +114,8 @@ static suspend_pagedir_t *pagedir_save;
|
||||
#define SWSUSP_SIG "S1SUSPEND"
|
||||
|
||||
static struct swsusp_header {
|
||||
char reserved[PAGE_SIZE - 20 - sizeof(swp_entry_t)];
|
||||
char reserved[PAGE_SIZE - 20 - MAXKEY - MAXIV - sizeof(swp_entry_t)];
|
||||
u8 key_iv[MAXKEY+MAXIV];
|
||||
swp_entry_t swsusp_info;
|
||||
char orig_sig[10];
|
||||
char sig[10];
|
||||
@@ -129,6 +141,131 @@ static struct swsusp_info swsusp_info;
|
||||
static unsigned short swapfile_used[MAX_SWAPFILES];
|
||||
static unsigned short root_swap;
|
||||
|
||||
static int write_page(unsigned long addr, swp_entry_t * loc);
|
||||
static int bio_read_page(pgoff_t page_off, void * page);
|
||||
|
||||
static u8 key_iv[MAXKEY+MAXIV];
|
||||
|
||||
#ifdef CONFIG_SWSUSP_ENCRYPT
|
||||
|
||||
static int crypto_init(int mode, void **mem)
|
||||
{
|
||||
int error = 0;
|
||||
int len;
|
||||
char *modemsg;
|
||||
struct crypto_tfm *tfm;
|
||||
|
||||
modemsg = mode ? "suspend not possible" : "resume not possible";
|
||||
|
||||
tfm = crypto_alloc_tfm(CIPHER, CRYPTO_TFM_MODE_CBC);
|
||||
if(!tfm) {
|
||||
printk(KERN_ERR "swsusp: no tfm, %s\n", modemsg);
|
||||
error = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if(MAXKEY < crypto_tfm_alg_min_keysize(tfm)) {
|
||||
printk(KERN_ERR "swsusp: key buffer too small, %s\n", modemsg);
|
||||
error = -ENOKEY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (mode)
|
||||
get_random_bytes(key_iv, MAXKEY+MAXIV);
|
||||
|
||||
len = crypto_tfm_alg_max_keysize(tfm);
|
||||
if (len > MAXKEY)
|
||||
len = MAXKEY;
|
||||
|
||||
if (crypto_cipher_setkey(tfm, key_iv, len)) {
|
||||
printk(KERN_ERR "swsusp: key setup failure, %s\n", modemsg);
|
||||
error = -EKEYREJECTED;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
len = crypto_tfm_alg_ivsize(tfm);
|
||||
|
||||
if (MAXIV < len) {
|
||||
printk(KERN_ERR "swsusp: iv buffer too small, %s\n", modemsg);
|
||||
error = -EOVERFLOW;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
crypto_cipher_set_iv(tfm, key_iv+MAXKEY, len);
|
||||
|
||||
*mem=(void *)tfm;
|
||||
|
||||
goto out;
|
||||
|
||||
fail: crypto_free_tfm(tfm);
|
||||
out: return error;
|
||||
}
|
||||
|
||||
static __inline__ void crypto_exit(void *mem)
|
||||
{
|
||||
crypto_free_tfm((struct crypto_tfm *)mem);
|
||||
}
|
||||
|
||||
static __inline__ int crypto_write(struct pbe *p, void *mem)
|
||||
{
|
||||
int error = 0;
|
||||
struct scatterlist src, dst;
|
||||
|
||||
src.page = virt_to_page(p->address);
|
||||
src.offset = 0;
|
||||
src.length = PAGE_SIZE;
|
||||
dst.page = virt_to_page((void *)&swsusp_header);
|
||||
dst.offset = 0;
|
||||
dst.length = PAGE_SIZE;
|
||||
|
||||
error = crypto_cipher_encrypt((struct crypto_tfm *)mem, &dst, &src,
|
||||
PAGE_SIZE);
|
||||
|
||||
if (!error)
|
||||
error = write_page((unsigned long)&swsusp_header,
|
||||
&(p->swap_address));
|
||||
return error;
|
||||
}
|
||||
|
||||
static __inline__ int crypto_read(struct pbe *p, void *mem)
|
||||
{
|
||||
int error = 0;
|
||||
struct scatterlist src, dst;
|
||||
|
||||
error = bio_read_page(swp_offset(p->swap_address), (void *)p->address);
|
||||
if (!error) {
|
||||
src.offset = 0;
|
||||
src.length = PAGE_SIZE;
|
||||
dst.offset = 0;
|
||||
dst.length = PAGE_SIZE;
|
||||
src.page = dst.page = virt_to_page((void *)p->address);
|
||||
|
||||
error = crypto_cipher_decrypt((struct crypto_tfm *)mem, &dst,
|
||||
&src, PAGE_SIZE);
|
||||
}
|
||||
return error;
|
||||
}
|
||||
#else
|
||||
static __inline__ int crypto_init(int mode, void *mem)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __inline__ void crypto_exit(void *mem)
|
||||
{
|
||||
}
|
||||
|
||||
static __inline__ int crypto_write(struct pbe *p, void *mem)
|
||||
{
|
||||
return write_page(p->address, &(p->swap_address));
|
||||
}
|
||||
|
||||
static __inline__ int crypto_read(struct pbe *p, void *mem)
|
||||
{
|
||||
return bio_read_page(swp_offset(p->swap_address), (void *)p->address);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int mark_swapfiles(swp_entry_t prev)
|
||||
{
|
||||
int error;
|
||||
@@ -140,6 +277,7 @@ static int mark_swapfiles(swp_entry_t prev)
|
||||
!memcmp("SWAPSPACE2",swsusp_header.sig, 10)) {
|
||||
memcpy(swsusp_header.orig_sig,swsusp_header.sig, 10);
|
||||
memcpy(swsusp_header.sig,SWSUSP_SIG, 10);
|
||||
memcpy(swsusp_header.key_iv, key_iv, MAXKEY+MAXIV);
|
||||
swsusp_header.swsusp_info = prev;
|
||||
error = rw_swap_page_sync(WRITE,
|
||||
swp_entry(root_swap, 0),
|
||||
@@ -179,9 +317,9 @@ static int swsusp_swap_check(void) /* This is called before saving image */
|
||||
len=strlen(resume_file);
|
||||
root_swap = 0xFFFF;
|
||||
|
||||
swap_list_lock();
|
||||
spin_lock(&swap_lock);
|
||||
for (i=0; i<MAX_SWAPFILES; i++) {
|
||||
if (swap_info[i].flags == 0) {
|
||||
if (!(swap_info[i].flags & SWP_WRITEOK)) {
|
||||
swapfile_used[i]=SWAPFILE_UNUSED;
|
||||
} else {
|
||||
if (!len) {
|
||||
@@ -202,7 +340,7 @@ static int swsusp_swap_check(void) /* This is called before saving image */
|
||||
}
|
||||
}
|
||||
}
|
||||
swap_list_unlock();
|
||||
spin_unlock(&swap_lock);
|
||||
return (root_swap != 0xffff) ? 0 : -ENODEV;
|
||||
}
|
||||
|
||||
@@ -216,12 +354,12 @@ static void lock_swapdevices(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
swap_list_lock();
|
||||
spin_lock(&swap_lock);
|
||||
for (i = 0; i< MAX_SWAPFILES; i++)
|
||||
if (swapfile_used[i] == SWAPFILE_IGNORED) {
|
||||
swap_info[i].flags ^= 0xFF;
|
||||
swap_info[i].flags ^= SWP_WRITEOK;
|
||||
}
|
||||
swap_list_unlock();
|
||||
spin_unlock(&swap_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -286,6 +424,10 @@ static int data_write(void)
|
||||
int error = 0, i = 0;
|
||||
unsigned int mod = nr_copy_pages / 100;
|
||||
struct pbe *p;
|
||||
void *tfm;
|
||||
|
||||
if ((error = crypto_init(1, &tfm)))
|
||||
return error;
|
||||
|
||||
if (!mod)
|
||||
mod = 1;
|
||||
@@ -294,11 +436,14 @@ static int data_write(void)
|
||||
for_each_pbe (p, pagedir_nosave) {
|
||||
if (!(i%mod))
|
||||
printk( "\b\b\b\b%3d%%", i / mod );
|
||||
if ((error = write_page(p->address, &(p->swap_address))))
|
||||
if ((error = crypto_write(p, tfm))) {
|
||||
crypto_exit(tfm);
|
||||
return error;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
printk("\b\b\b\bdone\n");
|
||||
crypto_exit(tfm);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -385,7 +530,6 @@ static int write_pagedir(void)
|
||||
* write_suspend_image - Write entire image and metadata.
|
||||
*
|
||||
*/
|
||||
|
||||
static int write_suspend_image(void)
|
||||
{
|
||||
int error;
|
||||
@@ -400,6 +544,7 @@ static int write_suspend_image(void)
|
||||
if ((error = close_swap()))
|
||||
goto FreePagedir;
|
||||
Done:
|
||||
memset(key_iv, 0, MAXKEY+MAXIV);
|
||||
return error;
|
||||
FreePagedir:
|
||||
free_pagedir_entries();
|
||||
@@ -591,18 +736,7 @@ static void copy_data_pages(void)
|
||||
|
||||
static int calc_nr(int nr_copy)
|
||||
{
|
||||
int extra = 0;
|
||||
int mod = !!(nr_copy % PBES_PER_PAGE);
|
||||
int diff = (nr_copy / PBES_PER_PAGE) + mod;
|
||||
|
||||
do {
|
||||
extra += diff;
|
||||
nr_copy += diff;
|
||||
mod = !!(nr_copy % PBES_PER_PAGE);
|
||||
diff = (nr_copy / PBES_PER_PAGE) + mod - extra;
|
||||
} while (diff > 0);
|
||||
|
||||
return nr_copy;
|
||||
return nr_copy + (nr_copy+PBES_PER_PAGE-2)/(PBES_PER_PAGE-1);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -886,20 +1020,21 @@ int swsusp_suspend(void)
|
||||
* at resume time, and evil weirdness ensues.
|
||||
*/
|
||||
if ((error = device_power_down(PMSG_FREEZE))) {
|
||||
printk(KERN_ERR "Some devices failed to power down, aborting suspend\n");
|
||||
local_irq_enable();
|
||||
return error;
|
||||
}
|
||||
|
||||
if ((error = swsusp_swap_check())) {
|
||||
printk(KERN_ERR "swsusp: FATAL: cannot find swap device, try "
|
||||
"swapon -a!\n");
|
||||
printk(KERN_ERR "swsusp: cannot find swap device, try swapon -a.\n");
|
||||
device_power_up();
|
||||
local_irq_enable();
|
||||
return error;
|
||||
}
|
||||
|
||||
save_processor_state();
|
||||
if ((error = swsusp_arch_suspend()))
|
||||
printk("Error %d suspending\n", error);
|
||||
printk(KERN_ERR "Error %d suspending\n", error);
|
||||
/* Restore control flow magically appears here */
|
||||
restore_processor_state();
|
||||
BUG_ON (nr_copy_pages_check != nr_copy_pages);
|
||||
@@ -924,6 +1059,7 @@ int swsusp_resume(void)
|
||||
BUG_ON(!error);
|
||||
restore_processor_state();
|
||||
restore_highmem();
|
||||
touch_softlockup_watchdog();
|
||||
device_power_up();
|
||||
local_irq_enable();
|
||||
return error;
|
||||
@@ -1179,7 +1315,8 @@ static const char * sanity_check(void)
|
||||
if (strcmp(swsusp_info.uts.machine,system_utsname.machine))
|
||||
return "machine";
|
||||
#if 0
|
||||
if(swsusp_info.cpus != num_online_cpus())
|
||||
/* We can't use number of online CPUs when we use hotplug to remove them ;-))) */
|
||||
if (swsusp_info.cpus != num_possible_cpus())
|
||||
return "number of cpus";
|
||||
#endif
|
||||
return NULL;
|
||||
@@ -1212,13 +1349,14 @@ static int check_sig(void)
|
||||
return error;
|
||||
if (!memcmp(SWSUSP_SIG, swsusp_header.sig, 10)) {
|
||||
memcpy(swsusp_header.sig, swsusp_header.orig_sig, 10);
|
||||
memcpy(key_iv, swsusp_header.key_iv, MAXKEY+MAXIV);
|
||||
memset(swsusp_header.key_iv, 0, MAXKEY+MAXIV);
|
||||
|
||||
/*
|
||||
* Reset swap signature now.
|
||||
*/
|
||||
error = bio_write_page(0, &swsusp_header);
|
||||
} else {
|
||||
printk(KERN_ERR "swsusp: Suspend partition has wrong signature?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!error)
|
||||
@@ -1239,6 +1377,10 @@ static int data_read(struct pbe *pblist)
|
||||
int error = 0;
|
||||
int i = 0;
|
||||
int mod = swsusp_info.image_pages / 100;
|
||||
void *tfm;
|
||||
|
||||
if ((error = crypto_init(0, &tfm)))
|
||||
return error;
|
||||
|
||||
if (!mod)
|
||||
mod = 1;
|
||||
@@ -1250,14 +1392,15 @@ static int data_read(struct pbe *pblist)
|
||||
if (!(i % mod))
|
||||
printk("\b\b\b\b%3d%%", i / mod);
|
||||
|
||||
error = bio_read_page(swp_offset(p->swap_address),
|
||||
(void *)p->address);
|
||||
if (error)
|
||||
if ((error = crypto_read(p, tfm))) {
|
||||
crypto_exit(tfm);
|
||||
return error;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
printk("\b\b\b\bdone\n");
|
||||
crypto_exit(tfm);
|
||||
return error;
|
||||
}
|
||||
|
||||
@@ -1385,6 +1528,7 @@ int swsusp_read(void)
|
||||
|
||||
error = read_suspend_image();
|
||||
blkdev_put(resume_bdev);
|
||||
memset(key_iv, 0, MAXKEY+MAXIV);
|
||||
|
||||
if (!error)
|
||||
pr_debug("swsusp: Reading resume file was successful\n");
|
||||
|
@@ -514,6 +514,9 @@ asmlinkage int printk(const char *fmt, ...)
|
||||
return r;
|
||||
}
|
||||
|
||||
/* cpu currently holding logbuf_lock */
|
||||
static volatile unsigned int printk_cpu = UINT_MAX;
|
||||
|
||||
asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -522,11 +525,15 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
static char printk_buf[1024];
|
||||
static int log_level_unknown = 1;
|
||||
|
||||
if (unlikely(oops_in_progress))
|
||||
preempt_disable();
|
||||
if (unlikely(oops_in_progress) && printk_cpu == smp_processor_id())
|
||||
/* If a crash is occurring during printk() on this CPU,
|
||||
* make sure we can't deadlock */
|
||||
zap_locks();
|
||||
|
||||
/* This stops the holder of console_sem just where we want him */
|
||||
spin_lock_irqsave(&logbuf_lock, flags);
|
||||
printk_cpu = smp_processor_id();
|
||||
|
||||
/* Emit the output into the temporary buffer */
|
||||
printed_len = vscnprintf(printk_buf, sizeof(printk_buf), fmt, args);
|
||||
@@ -595,6 +602,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
* CPU until it is officially up. We shouldn't be calling into
|
||||
* random console drivers on a CPU which doesn't exist yet..
|
||||
*/
|
||||
printk_cpu = UINT_MAX;
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
goto out;
|
||||
}
|
||||
@@ -604,6 +612,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
* We own the drivers. We can drop the spinlock and let
|
||||
* release_console_sem() print the text
|
||||
*/
|
||||
printk_cpu = UINT_MAX;
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
console_may_schedule = 0;
|
||||
release_console_sem();
|
||||
@@ -613,9 +622,11 @@ asmlinkage int vprintk(const char *fmt, va_list args)
|
||||
* allows the semaphore holder to proceed and to call the
|
||||
* console drivers with the output which we just produced.
|
||||
*/
|
||||
printk_cpu = UINT_MAX;
|
||||
spin_unlock_irqrestore(&logbuf_lock, flags);
|
||||
}
|
||||
out:
|
||||
preempt_enable();
|
||||
return printed_len;
|
||||
}
|
||||
EXPORT_SYMBOL(printk);
|
||||
|
@@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int may_attach(struct task_struct *task)
|
||||
{
|
||||
if (!task->mm)
|
||||
return -EPERM;
|
||||
if (((current->uid != task->euid) ||
|
||||
(current->uid != task->suid) ||
|
||||
(current->uid != task->uid) ||
|
||||
(current->gid != task->egid) ||
|
||||
(current->gid != task->sgid) ||
|
||||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
|
||||
return -EPERM;
|
||||
smp_rmb();
|
||||
if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
|
||||
return -EPERM;
|
||||
|
||||
return security_ptrace(current, task);
|
||||
}
|
||||
|
||||
int ptrace_may_attach(struct task_struct *task)
|
||||
{
|
||||
int err;
|
||||
task_lock(task);
|
||||
err = may_attach(task);
|
||||
task_unlock(task);
|
||||
return !err;
|
||||
}
|
||||
|
||||
int ptrace_attach(struct task_struct *task)
|
||||
{
|
||||
int retval;
|
||||
@@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task)
|
||||
goto bad;
|
||||
if (task == current)
|
||||
goto bad;
|
||||
if (!task->mm)
|
||||
goto bad;
|
||||
if(((current->uid != task->euid) ||
|
||||
(current->uid != task->suid) ||
|
||||
(current->uid != task->uid) ||
|
||||
(current->gid != task->egid) ||
|
||||
(current->gid != task->sgid) ||
|
||||
(current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
|
||||
goto bad;
|
||||
smp_rmb();
|
||||
if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
|
||||
goto bad;
|
||||
/* the same process cannot be attached many times */
|
||||
if (task->ptrace & PT_PTRACED)
|
||||
goto bad;
|
||||
retval = security_ptrace(current, task);
|
||||
retval = may_attach(task);
|
||||
if (retval)
|
||||
goto bad;
|
||||
|
||||
|
@@ -430,10 +430,9 @@ EXPORT_SYMBOL(adjust_resource);
|
||||
*/
|
||||
struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name)
|
||||
{
|
||||
struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL);
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
|
||||
if (res) {
|
||||
memset(res, 0, sizeof(*res));
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
|
340
kernel/sched.c
340
kernel/sched.c
@@ -1478,6 +1478,7 @@ static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
|
||||
|
||||
/**
|
||||
* finish_task_switch - clean up after a task-switch
|
||||
* @rq: runqueue associated with task-switch
|
||||
* @prev: the thread we just switched away from.
|
||||
*
|
||||
* finish_task_switch must be called after the context switch, paired
|
||||
@@ -4779,7 +4780,7 @@ static int sd_parent_degenerate(struct sched_domain *sd,
|
||||
* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
|
||||
* hold the hotplug lock.
|
||||
*/
|
||||
void cpu_attach_domain(struct sched_domain *sd, int cpu)
|
||||
static void cpu_attach_domain(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
runqueue_t *rq = cpu_rq(cpu);
|
||||
struct sched_domain *tmp;
|
||||
@@ -4802,7 +4803,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu)
|
||||
}
|
||||
|
||||
/* cpus with isolated domains */
|
||||
cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
|
||||
static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
|
||||
|
||||
/* Setup the mask of cpus configured for isolated domains */
|
||||
static int __init isolated_cpu_setup(char *str)
|
||||
@@ -4830,8 +4831,8 @@ __setup ("isolcpus=", isolated_cpu_setup);
|
||||
* covered by the given span, and will set each group's ->cpumask correctly,
|
||||
* and ->cpu_power to 0.
|
||||
*/
|
||||
void init_sched_build_groups(struct sched_group groups[],
|
||||
cpumask_t span, int (*group_fn)(int cpu))
|
||||
static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
|
||||
int (*group_fn)(int cpu))
|
||||
{
|
||||
struct sched_group *first = NULL, *last = NULL;
|
||||
cpumask_t covered = CPU_MASK_NONE;
|
||||
@@ -4864,12 +4865,85 @@ void init_sched_build_groups(struct sched_group groups[],
|
||||
last->next = first;
|
||||
}
|
||||
|
||||
#define SD_NODES_PER_DOMAIN 16
|
||||
|
||||
#ifdef ARCH_HAS_SCHED_DOMAIN
|
||||
extern void build_sched_domains(const cpumask_t *cpu_map);
|
||||
extern void arch_init_sched_domains(const cpumask_t *cpu_map);
|
||||
extern void arch_destroy_sched_domains(const cpumask_t *cpu_map);
|
||||
#else
|
||||
#ifdef CONFIG_NUMA
|
||||
/**
|
||||
* find_next_best_node - find the next node to include in a sched_domain
|
||||
* @node: node whose sched_domain we're building
|
||||
* @used_nodes: nodes already in the sched_domain
|
||||
*
|
||||
* Find the next node to include in a given scheduling domain. Simply
|
||||
* finds the closest node not already in the @used_nodes map.
|
||||
*
|
||||
* Should use nodemask_t.
|
||||
*/
|
||||
static int find_next_best_node(int node, unsigned long *used_nodes)
|
||||
{
|
||||
int i, n, val, min_val, best_node = 0;
|
||||
|
||||
min_val = INT_MAX;
|
||||
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
/* Start at @node */
|
||||
n = (node + i) % MAX_NUMNODES;
|
||||
|
||||
if (!nr_cpus_node(n))
|
||||
continue;
|
||||
|
||||
/* Skip already used nodes */
|
||||
if (test_bit(n, used_nodes))
|
||||
continue;
|
||||
|
||||
/* Simple min distance search */
|
||||
val = node_distance(node, n);
|
||||
|
||||
if (val < min_val) {
|
||||
min_val = val;
|
||||
best_node = n;
|
||||
}
|
||||
}
|
||||
|
||||
set_bit(best_node, used_nodes);
|
||||
return best_node;
|
||||
}
|
||||
|
||||
/**
|
||||
* sched_domain_node_span - get a cpumask for a node's sched_domain
|
||||
* @node: node whose cpumask we're constructing
|
||||
* @size: number of nodes to include in this span
|
||||
*
|
||||
* Given a node, construct a good cpumask for its sched_domain to span. It
|
||||
* should be one that prevents unnecessary balancing, but also spreads tasks
|
||||
* out optimally.
|
||||
*/
|
||||
static cpumask_t sched_domain_node_span(int node)
|
||||
{
|
||||
int i;
|
||||
cpumask_t span, nodemask;
|
||||
DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
|
||||
|
||||
cpus_clear(span);
|
||||
bitmap_zero(used_nodes, MAX_NUMNODES);
|
||||
|
||||
nodemask = node_to_cpumask(node);
|
||||
cpus_or(span, span, nodemask);
|
||||
set_bit(node, used_nodes);
|
||||
|
||||
for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
|
||||
int next_node = find_next_best_node(node, used_nodes);
|
||||
nodemask = node_to_cpumask(next_node);
|
||||
cpus_or(span, span, nodemask);
|
||||
}
|
||||
|
||||
return span;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
|
||||
* can switch it on easily if needed.
|
||||
*/
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
|
||||
static struct sched_group sched_group_cpus[NR_CPUS];
|
||||
@@ -4891,46 +4965,45 @@ static int cpu_to_phys_group(int cpu)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
/*
|
||||
* The init_sched_build_groups can't handle what we want to do with node
|
||||
* groups, so roll our own. Now each node has its own list of groups which
|
||||
* gets dynamically allocated.
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct sched_domain, node_domains);
|
||||
static struct sched_group sched_group_nodes[MAX_NUMNODES];
|
||||
static int cpu_to_node_group(int cpu)
|
||||
static struct sched_group **sched_group_nodes_bycpu[NR_CPUS];
|
||||
|
||||
static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
|
||||
static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS];
|
||||
|
||||
static int cpu_to_allnodes_group(int cpu)
|
||||
{
|
||||
return cpu_to_node(cpu);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
|
||||
/*
|
||||
* The domains setup code relies on siblings not spanning
|
||||
* multiple nodes. Make sure the architecture has a proper
|
||||
* siblings map:
|
||||
*/
|
||||
static void check_sibling_maps(void)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for_each_online_cpu(i) {
|
||||
for_each_cpu_mask(j, cpu_sibling_map[i]) {
|
||||
if (cpu_to_node(i) != cpu_to_node(j)) {
|
||||
printk(KERN_INFO "warning: CPU %d siblings map "
|
||||
"to different node - isolating "
|
||||
"them.\n", i);
|
||||
cpu_sibling_map[i] = cpumask_of_cpu(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Build sched domains for a given set of cpus and attach the sched domains
|
||||
* to the individual cpus
|
||||
*/
|
||||
static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
void build_sched_domains(const cpumask_t *cpu_map)
|
||||
{
|
||||
int i;
|
||||
#ifdef CONFIG_NUMA
|
||||
struct sched_group **sched_group_nodes = NULL;
|
||||
struct sched_group *sched_group_allnodes = NULL;
|
||||
|
||||
/*
|
||||
* Allocate the per-node list of sched groups
|
||||
*/
|
||||
sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES,
|
||||
GFP_ATOMIC);
|
||||
if (!sched_group_nodes) {
|
||||
printk(KERN_WARNING "Can not alloc sched group node list\n");
|
||||
return;
|
||||
}
|
||||
sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Set up domains for cpus specified by the cpu_map.
|
||||
@@ -4943,11 +5016,35 @@ static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
cpus_and(nodemask, nodemask, *cpu_map);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (cpus_weight(*cpu_map)
|
||||
> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
|
||||
if (!sched_group_allnodes) {
|
||||
sched_group_allnodes
|
||||
= kmalloc(sizeof(struct sched_group)
|
||||
* MAX_NUMNODES,
|
||||
GFP_KERNEL);
|
||||
if (!sched_group_allnodes) {
|
||||
printk(KERN_WARNING
|
||||
"Can not alloc allnodes sched group\n");
|
||||
break;
|
||||
}
|
||||
sched_group_allnodes_bycpu[i]
|
||||
= sched_group_allnodes;
|
||||
}
|
||||
sd = &per_cpu(allnodes_domains, i);
|
||||
*sd = SD_ALLNODES_INIT;
|
||||
sd->span = *cpu_map;
|
||||
group = cpu_to_allnodes_group(i);
|
||||
sd->groups = &sched_group_allnodes[group];
|
||||
p = sd;
|
||||
} else
|
||||
p = NULL;
|
||||
|
||||
sd = &per_cpu(node_domains, i);
|
||||
group = cpu_to_node_group(i);
|
||||
*sd = SD_NODE_INIT;
|
||||
sd->span = *cpu_map;
|
||||
sd->groups = &sched_group_nodes[group];
|
||||
sd->span = sched_domain_node_span(cpu_to_node(i));
|
||||
sd->parent = p;
|
||||
cpus_and(sd->span, sd->span, *cpu_map);
|
||||
#endif
|
||||
|
||||
p = sd;
|
||||
@@ -4972,7 +5069,7 @@ static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
/* Set up CPU (sibling) groups */
|
||||
for_each_online_cpu(i) {
|
||||
for_each_cpu_mask(i, *cpu_map) {
|
||||
cpumask_t this_sibling_map = cpu_sibling_map[i];
|
||||
cpus_and(this_sibling_map, this_sibling_map, *cpu_map);
|
||||
if (i != first_cpu(this_sibling_map))
|
||||
@@ -4997,8 +5094,77 @@ static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
/* Set up node groups */
|
||||
init_sched_build_groups(sched_group_nodes, *cpu_map,
|
||||
&cpu_to_node_group);
|
||||
if (sched_group_allnodes)
|
||||
init_sched_build_groups(sched_group_allnodes, *cpu_map,
|
||||
&cpu_to_allnodes_group);
|
||||
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
/* Set up node groups */
|
||||
struct sched_group *sg, *prev;
|
||||
cpumask_t nodemask = node_to_cpumask(i);
|
||||
cpumask_t domainspan;
|
||||
cpumask_t covered = CPU_MASK_NONE;
|
||||
int j;
|
||||
|
||||
cpus_and(nodemask, nodemask, *cpu_map);
|
||||
if (cpus_empty(nodemask)) {
|
||||
sched_group_nodes[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
domainspan = sched_domain_node_span(i);
|
||||
cpus_and(domainspan, domainspan, *cpu_map);
|
||||
|
||||
sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
|
||||
sched_group_nodes[i] = sg;
|
||||
for_each_cpu_mask(j, nodemask) {
|
||||
struct sched_domain *sd;
|
||||
sd = &per_cpu(node_domains, j);
|
||||
sd->groups = sg;
|
||||
if (sd->groups == NULL) {
|
||||
/* Turn off balancing if we have no groups */
|
||||
sd->flags = 0;
|
||||
}
|
||||
}
|
||||
if (!sg) {
|
||||
printk(KERN_WARNING
|
||||
"Can not alloc domain group for node %d\n", i);
|
||||
continue;
|
||||
}
|
||||
sg->cpu_power = 0;
|
||||
sg->cpumask = nodemask;
|
||||
cpus_or(covered, covered, nodemask);
|
||||
prev = sg;
|
||||
|
||||
for (j = 0; j < MAX_NUMNODES; j++) {
|
||||
cpumask_t tmp, notcovered;
|
||||
int n = (i + j) % MAX_NUMNODES;
|
||||
|
||||
cpus_complement(notcovered, covered);
|
||||
cpus_and(tmp, notcovered, *cpu_map);
|
||||
cpus_and(tmp, tmp, domainspan);
|
||||
if (cpus_empty(tmp))
|
||||
break;
|
||||
|
||||
nodemask = node_to_cpumask(n);
|
||||
cpus_and(tmp, tmp, nodemask);
|
||||
if (cpus_empty(tmp))
|
||||
continue;
|
||||
|
||||
sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
|
||||
if (!sg) {
|
||||
printk(KERN_WARNING
|
||||
"Can not alloc domain group for node %d\n", j);
|
||||
break;
|
||||
}
|
||||
sg->cpu_power = 0;
|
||||
sg->cpumask = tmp;
|
||||
cpus_or(covered, covered, tmp);
|
||||
prev->next = sg;
|
||||
prev = sg;
|
||||
}
|
||||
prev->next = sched_group_nodes[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Calculate CPU power for physical packages and nodes */
|
||||
@@ -5017,14 +5183,46 @@ static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
sd->groups->cpu_power = power;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
if (i == first_cpu(sd->groups->cpumask)) {
|
||||
/* Only add "power" once for each physical package. */
|
||||
sd = &per_cpu(node_domains, i);
|
||||
sd->groups->cpu_power += power;
|
||||
sd = &per_cpu(allnodes_domains, i);
|
||||
if (sd->groups) {
|
||||
power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
|
||||
(cpus_weight(sd->groups->cpumask)-1) / 10;
|
||||
sd->groups->cpu_power = power;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
struct sched_group *sg = sched_group_nodes[i];
|
||||
int j;
|
||||
|
||||
if (sg == NULL)
|
||||
continue;
|
||||
next_sg:
|
||||
for_each_cpu_mask(j, sg->cpumask) {
|
||||
struct sched_domain *sd;
|
||||
int power;
|
||||
|
||||
sd = &per_cpu(phys_domains, j);
|
||||
if (j != first_cpu(sd->groups->cpumask)) {
|
||||
/*
|
||||
* Only add "power" once for each
|
||||
* physical package.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
|
||||
(cpus_weight(sd->groups->cpumask)-1) / 10;
|
||||
|
||||
sg->cpu_power += power;
|
||||
}
|
||||
sg = sg->next;
|
||||
if (sg != sched_group_nodes[i])
|
||||
goto next_sg;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Attach the domains */
|
||||
for_each_cpu_mask(i, *cpu_map) {
|
||||
struct sched_domain *sd;
|
||||
@@ -5039,13 +5237,10 @@ static void build_sched_domains(const cpumask_t *cpu_map)
|
||||
/*
|
||||
* Set up scheduler domains and groups. Callers must hold the hotplug lock.
|
||||
*/
|
||||
static void arch_init_sched_domains(cpumask_t *cpu_map)
|
||||
static void arch_init_sched_domains(const cpumask_t *cpu_map)
|
||||
{
|
||||
cpumask_t cpu_default_map;
|
||||
|
||||
#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA)
|
||||
check_sibling_maps();
|
||||
#endif
|
||||
/*
|
||||
* Setup mask for cpus without special case scheduling requirements.
|
||||
* For now this just excludes isolated cpus, but could be used to
|
||||
@@ -5058,10 +5253,47 @@ static void arch_init_sched_domains(cpumask_t *cpu_map)
|
||||
|
||||
static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
|
||||
{
|
||||
/* Do nothing: everything is statically allocated. */
|
||||
}
|
||||
#ifdef CONFIG_NUMA
|
||||
int i;
|
||||
int cpu;
|
||||
|
||||
#endif /* ARCH_HAS_SCHED_DOMAIN */
|
||||
for_each_cpu_mask(cpu, *cpu_map) {
|
||||
struct sched_group *sched_group_allnodes
|
||||
= sched_group_allnodes_bycpu[cpu];
|
||||
struct sched_group **sched_group_nodes
|
||||
= sched_group_nodes_bycpu[cpu];
|
||||
|
||||
if (sched_group_allnodes) {
|
||||
kfree(sched_group_allnodes);
|
||||
sched_group_allnodes_bycpu[cpu] = NULL;
|
||||
}
|
||||
|
||||
if (!sched_group_nodes)
|
||||
continue;
|
||||
|
||||
for (i = 0; i < MAX_NUMNODES; i++) {
|
||||
cpumask_t nodemask = node_to_cpumask(i);
|
||||
struct sched_group *oldsg, *sg = sched_group_nodes[i];
|
||||
|
||||
cpus_and(nodemask, nodemask, *cpu_map);
|
||||
if (cpus_empty(nodemask))
|
||||
continue;
|
||||
|
||||
if (sg == NULL)
|
||||
continue;
|
||||
sg = sg->next;
|
||||
next_sg:
|
||||
oldsg = sg;
|
||||
sg = sg->next;
|
||||
kfree(oldsg);
|
||||
if (oldsg != sched_group_nodes[i])
|
||||
goto next_sg;
|
||||
}
|
||||
kfree(sched_group_nodes);
|
||||
sched_group_nodes_bycpu[cpu] = NULL;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Detach sched domains from a group of cpus specified in cpu_map
|
||||
|
@@ -678,7 +678,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
|
||||
|
||||
/* forward decl */
|
||||
static void do_notify_parent_cldstop(struct task_struct *tsk,
|
||||
struct task_struct *parent,
|
||||
int to_self,
|
||||
int why);
|
||||
|
||||
/*
|
||||
@@ -729,14 +729,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
|
||||
p->signal->group_stop_count = 0;
|
||||
p->signal->flags = SIGNAL_STOP_CONTINUED;
|
||||
spin_unlock(&p->sighand->siglock);
|
||||
if (p->ptrace & PT_PTRACED)
|
||||
do_notify_parent_cldstop(p, p->parent,
|
||||
CLD_STOPPED);
|
||||
else
|
||||
do_notify_parent_cldstop(
|
||||
p->group_leader,
|
||||
p->group_leader->real_parent,
|
||||
CLD_STOPPED);
|
||||
do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED);
|
||||
spin_lock(&p->sighand->siglock);
|
||||
}
|
||||
rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
|
||||
@@ -777,14 +770,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
|
||||
p->signal->flags = SIGNAL_STOP_CONTINUED;
|
||||
p->signal->group_exit_code = 0;
|
||||
spin_unlock(&p->sighand->siglock);
|
||||
if (p->ptrace & PT_PTRACED)
|
||||
do_notify_parent_cldstop(p, p->parent,
|
||||
CLD_CONTINUED);
|
||||
else
|
||||
do_notify_parent_cldstop(
|
||||
p->group_leader,
|
||||
p->group_leader->real_parent,
|
||||
CLD_CONTINUED);
|
||||
do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED);
|
||||
spin_lock(&p->sighand->siglock);
|
||||
} else {
|
||||
/*
|
||||
@@ -1380,16 +1366,16 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* We need the tasklist lock even for the specific
|
||||
* thread case (when we don't need to follow the group
|
||||
* lists) in order to avoid races with "p->sighand"
|
||||
* going away or changing from under us.
|
||||
*/
|
||||
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
|
||||
read_lock(&tasklist_lock);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
if (unlikely(p->flags & PF_EXITING)) {
|
||||
ret = -1;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&p->sighand->siglock, flags);
|
||||
|
||||
|
||||
if (unlikely(!list_empty(&q->list))) {
|
||||
/*
|
||||
* If an SI_TIMER entry is already queue just increment
|
||||
@@ -1399,7 +1385,7 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
||||
BUG();
|
||||
q->info.si_overrun++;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
/* Short-circuit ignored signals. */
|
||||
if (sig_ignored(p, sig)) {
|
||||
ret = 1;
|
||||
@@ -1414,8 +1400,10 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&p->sighand->siglock, flags);
|
||||
out_err:
|
||||
read_unlock(&tasklist_lock);
|
||||
return(ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
@@ -1542,14 +1530,20 @@ void do_notify_parent(struct task_struct *tsk, int sig)
|
||||
spin_unlock_irqrestore(&psig->siglock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
do_notify_parent_cldstop(struct task_struct *tsk, struct task_struct *parent,
|
||||
int why)
|
||||
static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why)
|
||||
{
|
||||
struct siginfo info;
|
||||
unsigned long flags;
|
||||
struct task_struct *parent;
|
||||
struct sighand_struct *sighand;
|
||||
|
||||
if (to_self)
|
||||
parent = tsk->parent;
|
||||
else {
|
||||
tsk = tsk->group_leader;
|
||||
parent = tsk->real_parent;
|
||||
}
|
||||
|
||||
info.si_signo = SIGCHLD;
|
||||
info.si_errno = 0;
|
||||
info.si_pid = tsk->pid;
|
||||
@@ -1618,8 +1612,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
|
||||
!(current->ptrace & PT_ATTACHED)) &&
|
||||
(likely(current->parent->signal != current->signal) ||
|
||||
!unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
|
||||
do_notify_parent_cldstop(current, current->parent,
|
||||
CLD_TRAPPED);
|
||||
do_notify_parent_cldstop(current, 1, CLD_TRAPPED);
|
||||
read_unlock(&tasklist_lock);
|
||||
schedule();
|
||||
} else {
|
||||
@@ -1668,25 +1661,25 @@ void ptrace_notify(int exit_code)
|
||||
static void
|
||||
finish_stop(int stop_count)
|
||||
{
|
||||
int to_self;
|
||||
|
||||
/*
|
||||
* If there are no other threads in the group, or if there is
|
||||
* a group stop in progress and we are the last to stop,
|
||||
* report to the parent. When ptraced, every thread reports itself.
|
||||
*/
|
||||
if (stop_count < 0 || (current->ptrace & PT_PTRACED)) {
|
||||
read_lock(&tasklist_lock);
|
||||
do_notify_parent_cldstop(current, current->parent,
|
||||
CLD_STOPPED);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
else if (stop_count == 0) {
|
||||
read_lock(&tasklist_lock);
|
||||
do_notify_parent_cldstop(current->group_leader,
|
||||
current->group_leader->real_parent,
|
||||
CLD_STOPPED);
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
if (stop_count < 0 || (current->ptrace & PT_PTRACED))
|
||||
to_self = 1;
|
||||
else if (stop_count == 0)
|
||||
to_self = 0;
|
||||
else
|
||||
goto out;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
do_notify_parent_cldstop(current, to_self, CLD_STOPPED);
|
||||
read_unlock(&tasklist_lock);
|
||||
|
||||
out:
|
||||
schedule();
|
||||
/*
|
||||
* Now we don't run again until continued.
|
||||
|
151
kernel/softlockup.c
Normal file
151
kernel/softlockup.c
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
* Detect Soft Lockups
|
||||
*
|
||||
* started by Ingo Molnar, (C) 2005, Red Hat
|
||||
*
|
||||
* this code detects soft lockups: incidents in where on a CPU
|
||||
* the kernel does not reschedule for 10 seconds or more.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static DEFINE_SPINLOCK(print_lock);
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, timestamp) = 0;
|
||||
static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0;
|
||||
static DEFINE_PER_CPU(struct task_struct *, watchdog_task);
|
||||
|
||||
static int did_panic = 0;
|
||||
static int softlock_panic(struct notifier_block *this, unsigned long event,
|
||||
void *ptr)
|
||||
{
|
||||
did_panic = 1;
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block panic_block = {
|
||||
.notifier_call = softlock_panic,
|
||||
};
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
{
|
||||
per_cpu(timestamp, raw_smp_processor_id()) = jiffies;
|
||||
}
|
||||
EXPORT_SYMBOL(touch_softlockup_watchdog);
|
||||
|
||||
/*
|
||||
* This callback runs from the timer interrupt, and checks
|
||||
* whether the watchdog thread has hung or not:
|
||||
*/
|
||||
void softlockup_tick(struct pt_regs *regs)
|
||||
{
|
||||
int this_cpu = smp_processor_id();
|
||||
unsigned long timestamp = per_cpu(timestamp, this_cpu);
|
||||
|
||||
if (per_cpu(print_timestamp, this_cpu) == timestamp)
|
||||
return;
|
||||
|
||||
/* Do not cause a second panic when there already was one */
|
||||
if (did_panic)
|
||||
return;
|
||||
|
||||
if (time_after(jiffies, timestamp + 10*HZ)) {
|
||||
per_cpu(print_timestamp, this_cpu) = timestamp;
|
||||
|
||||
spin_lock(&print_lock);
|
||||
printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n",
|
||||
this_cpu);
|
||||
show_regs(regs);
|
||||
spin_unlock(&print_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The watchdog thread - runs every second and touches the timestamp.
|
||||
*/
|
||||
static int watchdog(void * __bind_cpu)
|
||||
{
|
||||
struct sched_param param = { .sched_priority = 99 };
|
||||
int this_cpu = (long) __bind_cpu;
|
||||
|
||||
printk("softlockup thread %d started up.\n", this_cpu);
|
||||
|
||||
sched_setscheduler(current, SCHED_FIFO, ¶m);
|
||||
current->flags |= PF_NOFREEZE;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* Run briefly once per second - if this gets delayed for
|
||||
* more than 10 seconds then the debug-printout triggers
|
||||
* in softlockup_tick():
|
||||
*/
|
||||
while (!kthread_should_stop()) {
|
||||
msleep_interruptible(1000);
|
||||
touch_softlockup_watchdog();
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Create/destroy watchdog threads as CPUs come and go:
|
||||
*/
|
||||
static int __devinit
|
||||
cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
int hotcpu = (unsigned long)hcpu;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action) {
|
||||
case CPU_UP_PREPARE:
|
||||
BUG_ON(per_cpu(watchdog_task, hotcpu));
|
||||
p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
|
||||
if (IS_ERR(p)) {
|
||||
printk("watchdog for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
per_cpu(watchdog_task, hotcpu) = p;
|
||||
kthread_bind(p, hotcpu);
|
||||
break;
|
||||
case CPU_ONLINE:
|
||||
|
||||
wake_up_process(per_cpu(watchdog_task, hotcpu));
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
/* Unbind so it can run. Fall thru. */
|
||||
kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id());
|
||||
case CPU_DEAD:
|
||||
p = per_cpu(watchdog_task, hotcpu);
|
||||
per_cpu(watchdog_task, hotcpu) = NULL;
|
||||
kthread_stop(p);
|
||||
break;
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
}
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __devinitdata cpu_nfb = {
|
||||
.notifier_call = cpu_callback
|
||||
};
|
||||
|
||||
__init void spawn_softlockup_task(void)
|
||||
{
|
||||
void *cpu = (void *)(long)smp_processor_id();
|
||||
|
||||
cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
|
||||
cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
|
||||
register_cpu_notifier(&cpu_nfb);
|
||||
|
||||
notifier_chain_register(&panic_notifier_list, &panic_block);
|
||||
}
|
||||
|
@@ -1711,7 +1711,6 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
|
||||
unsigned long arg4, unsigned long arg5)
|
||||
{
|
||||
long error;
|
||||
int sig;
|
||||
|
||||
error = security_task_prctl(option, arg2, arg3, arg4, arg5);
|
||||
if (error)
|
||||
@@ -1719,12 +1718,11 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
|
||||
|
||||
switch (option) {
|
||||
case PR_SET_PDEATHSIG:
|
||||
sig = arg2;
|
||||
if (!valid_signal(sig)) {
|
||||
if (!valid_signal(arg2)) {
|
||||
error = -EINVAL;
|
||||
break;
|
||||
}
|
||||
current->pdeath_signal = sig;
|
||||
current->pdeath_signal = arg2;
|
||||
break;
|
||||
case PR_GET_PDEATHSIG:
|
||||
error = put_user(current->pdeath_signal, (int __user *)arg2);
|
||||
|
@@ -950,6 +950,7 @@ void do_timer(struct pt_regs *regs)
|
||||
{
|
||||
jiffies_64++;
|
||||
update_times();
|
||||
softlockup_tick(regs);
|
||||
}
|
||||
|
||||
#ifdef __ARCH_WANT_SYS_ALARM
|
||||
@@ -1428,7 +1429,7 @@ static inline u64 time_interpolator_get_cycles(unsigned int src)
|
||||
}
|
||||
}
|
||||
|
||||
static inline u64 time_interpolator_get_counter(void)
|
||||
static inline u64 time_interpolator_get_counter(int writelock)
|
||||
{
|
||||
unsigned int src = time_interpolator->source;
|
||||
|
||||
@@ -1442,6 +1443,15 @@ static inline u64 time_interpolator_get_counter(void)
|
||||
now = time_interpolator_get_cycles(src);
|
||||
if (lcycle && time_after(lcycle, now))
|
||||
return lcycle;
|
||||
|
||||
/* When holding the xtime write lock, there's no need
|
||||
* to add the overhead of the cmpxchg. Readers are
|
||||
* force to retry until the write lock is released.
|
||||
*/
|
||||
if (writelock) {
|
||||
time_interpolator->last_cycle = now;
|
||||
return now;
|
||||
}
|
||||
/* Keep track of the last timer value returned. The use of cmpxchg here
|
||||
* will cause contention in an SMP environment.
|
||||
*/
|
||||
@@ -1455,7 +1465,7 @@ static inline u64 time_interpolator_get_counter(void)
|
||||
void time_interpolator_reset(void)
|
||||
{
|
||||
time_interpolator->offset = 0;
|
||||
time_interpolator->last_counter = time_interpolator_get_counter();
|
||||
time_interpolator->last_counter = time_interpolator_get_counter(1);
|
||||
}
|
||||
|
||||
#define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift)
|
||||
@@ -1467,7 +1477,7 @@ unsigned long time_interpolator_get_offset(void)
|
||||
return 0;
|
||||
|
||||
return time_interpolator->offset +
|
||||
GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator);
|
||||
GET_TI_NSECS(time_interpolator_get_counter(0), time_interpolator);
|
||||
}
|
||||
|
||||
#define INTERPOLATOR_ADJUST 65536
|
||||
@@ -1490,7 +1500,7 @@ static void time_interpolator_update(long delta_nsec)
|
||||
* and the tuning logic insures that.
|
||||
*/
|
||||
|
||||
counter = time_interpolator_get_counter();
|
||||
counter = time_interpolator_get_counter(1);
|
||||
offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator);
|
||||
|
||||
if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
|
||||
|
@@ -308,10 +308,9 @@ struct workqueue_struct *__create_workqueue(const char *name,
|
||||
struct workqueue_struct *wq;
|
||||
struct task_struct *p;
|
||||
|
||||
wq = kmalloc(sizeof(*wq), GFP_KERNEL);
|
||||
wq = kzalloc(sizeof(*wq), GFP_KERNEL);
|
||||
if (!wq)
|
||||
return NULL;
|
||||
memset(wq, 0, sizeof(*wq));
|
||||
|
||||
wq->name = name;
|
||||
/* We don't need the distraction of CPUs appearing and vanishing. */
|
||||
@@ -499,7 +498,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
|
||||
case CPU_UP_PREPARE:
|
||||
/* Create a new workqueue thread for it. */
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
if (create_workqueue_thread(wq, hotcpu) < 0) {
|
||||
if (!create_workqueue_thread(wq, hotcpu)) {
|
||||
printk("workqueue for %i failed\n", hotcpu);
|
||||
return NOTIFY_BAD;
|
||||
}
|
||||
|
Reference in New Issue
Block a user