Merge commit 'origin/master' into next

Manual merge of:
	arch/powerpc/include/asm/elf.h
	drivers/i2c/busses/i2c-mpc.c
This commit is contained in:
Benjamin Herrenschmidt
2009-03-30 14:04:53 +11:00
4402 changed files with 317171 additions and 163158 deletions

View File

@@ -49,6 +49,7 @@ asynchronous and synchronous parts of the kernel.
*/
#include <linux/async.h>
#include <linux/bug.h>
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/sched.h>
@@ -387,20 +388,11 @@ static int async_manager_thread(void *unused)
static int __init async_init(void)
{
if (async_enabled)
if (IS_ERR(kthread_run(async_manager_thread, NULL,
"async/mgr")))
async_enabled = 0;
async_enabled =
!IS_ERR(kthread_run(async_manager_thread, NULL, "async/mgr"));
WARN_ON(!async_enabled);
return 0;
}
static int __init setup_async(char *str)
{
async_enabled = 1;
return 1;
}
__setup("fastboot", setup_async);
core_initcall(async_init);

View File

@@ -1071,7 +1071,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
mutex_unlock(&cgroup_mutex);
}
return simple_set_mnt(mnt, sb);
simple_set_mnt(mnt, sb);
return 0;
free_cg_links:
free_cg_links(&tmp_cg_links);
@@ -1627,7 +1628,7 @@ static struct inode_operations cgroup_dir_inode_operations = {
static int cgroup_create_file(struct dentry *dentry, int mode,
struct super_block *sb)
{
static struct dentry_operations cgroup_dops = {
static const struct dentry_operations cgroup_dops = {
.d_iput = cgroup_diput,
};

View File

@@ -980,12 +980,9 @@ static void check_stack_usage(void)
{
static DEFINE_SPINLOCK(low_water_lock);
static int lowest_to_date = THREAD_SIZE;
unsigned long *n = end_of_stack(current);
unsigned long free;
while (*n == 0)
n++;
free = (unsigned long)n - (unsigned long)end_of_stack(current);
free = stack_not_used(current);
if (free >= lowest_to_date)
return;

View File

@@ -61,6 +61,7 @@
#include <linux/proc_fs.h>
#include <linux/blkdev.h>
#include <trace/sched.h>
#include <linux/magic.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
{
struct task_struct *tsk;
struct thread_info *ti;
unsigned long *stackend;
int err;
prepare_to_copy(orig);
@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
goto out;
setup_thread_stack(tsk, orig);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
#ifdef CONFIG_CC_STACKPROTECTOR
tsk->stack_canary = get_random_int();

View File

@@ -114,7 +114,9 @@ struct futex_q {
};
/*
* Split the global futex_lock into every hash list lock.
* Hash buckets are shared by all the futex_keys that hash to the same
* location. Each key may have multiple futex_q structures, one for each task
* waiting on a futex.
*/
struct futex_hash_bucket {
spinlock_t lock;
@@ -189,8 +191,7 @@ static void drop_futex_key_refs(union futex_key *key)
/**
* get_futex_key - Get parameters which are the keys for a futex.
* @uaddr: virtual address of the futex
* @shared: NULL for a PROCESS_PRIVATE futex,
* &current->mm->mmap_sem for a PROCESS_SHARED futex
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
* @key: address where result is stored.
*
* Returns a negative error code or 0
@@ -200,9 +201,7 @@ static void drop_futex_key_refs(union futex_key *key)
* offset_within_page). For private mappings, it's (uaddr, current->mm).
* We can usually work out the index without swapping in the page.
*
* fshared is NULL for PROCESS_PRIVATE futexes
* For other futexes, it points to &current->mm->mmap_sem and
* caller must have taken the reader lock. but NOT any spinlocks.
* lock_page() might sleep, the caller should not hold a spinlock.
*/
static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
{
@@ -299,41 +298,6 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
return ret ? -EFAULT : 0;
}
/*
* Fault handling.
*/
static int futex_handle_fault(unsigned long address, int attempt)
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
int ret = -EFAULT;
if (attempt > 2)
return ret;
down_read(&mm->mmap_sem);
vma = find_vma(mm, address);
if (vma && address >= vma->vm_start &&
(vma->vm_flags & VM_WRITE)) {
int fault;
fault = handle_mm_fault(mm, vma, address, 1);
if (unlikely((fault & VM_FAULT_ERROR))) {
#if 0
/* XXX: let's do this when we verify it is OK */
if (ret & VM_FAULT_OOM)
ret = -ENOMEM;
#endif
} else {
ret = 0;
if (fault & VM_FAULT_MAJOR)
current->maj_flt++;
else
current->min_flt++;
}
}
up_read(&mm->mmap_sem);
return ret;
}
/*
* PI code:
@@ -589,10 +553,9 @@ static void wake_futex(struct futex_q *q)
* The waiting task can free the futex_q as soon as this is written,
* without taking any locks. This must come last.
*
* A memory barrier is required here to prevent the following store
* to lock_ptr from getting ahead of the wakeup. Clearing the lock
* at the end of wake_up_all() does not prevent this store from
* moving.
* A memory barrier is required here to prevent the following store to
* lock_ptr from getting ahead of the wakeup. Clearing the lock at the
* end of wake_up() does not prevent this store from moving.
*/
smp_wmb();
q->lock_ptr = NULL;
@@ -692,9 +655,16 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
}
}
static inline void
double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
}
/*
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
* Wake up waiters matching bitset queued on this futex (uaddr).
*/
static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
{
@@ -750,9 +720,9 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
struct futex_hash_bucket *hb1, *hb2;
struct plist_head *head;
struct futex_q *this, *next;
int ret, op_ret, attempt = 0;
int ret, op_ret;
retryfull:
retry:
ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
@@ -763,16 +733,13 @@ retryfull:
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
retry:
double_lock_hb(hb1, hb2);
retry_private:
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
u32 dummy;
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
double_unlock_hb(hb1, hb2);
#ifndef CONFIG_MMU
/*
@@ -788,26 +755,16 @@ retry:
goto out_put_keys;
}
/*
* futex_atomic_op_inuser needs to both read and write
* *(int __user *)uaddr2, but we can't modify it
* non-atomically. Therefore, if get_user below is not
* enough, we need to handle the fault ourselves, while
* still holding the mmap_sem.
*/
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr2,
attempt);
if (ret)
goto out_put_keys;
goto retry;
}
ret = get_user(dummy, uaddr2);
if (ret)
return ret;
goto out_put_keys;
goto retryfull;
if (!fshared)
goto retry_private;
put_futex_key(fshared, &key2);
put_futex_key(fshared, &key1);
goto retry;
}
head = &hb1->chain;
@@ -834,9 +791,7 @@ retry:
ret += op_ret;
}
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
double_unlock_hb(hb1, hb2);
out_put_keys:
put_futex_key(fshared, &key2);
out_put_key1:
@@ -869,6 +824,7 @@ retry:
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
retry_private:
double_lock_hb(hb1, hb2);
if (likely(cmpval != NULL)) {
@@ -877,16 +833,18 @@ retry:
ret = get_futex_value_locked(&curval, uaddr1);
if (unlikely(ret)) {
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
double_unlock_hb(hb1, hb2);
ret = get_user(curval, uaddr1);
if (ret)
goto out_put_keys;
if (!ret)
goto retry;
if (!fshared)
goto retry_private;
goto out_put_keys;
put_futex_key(fshared, &key2);
put_futex_key(fshared, &key1);
goto retry;
}
if (curval != *cmpval) {
ret = -EAGAIN;
@@ -923,9 +881,7 @@ retry:
}
out_unlock:
spin_unlock(&hb1->lock);
if (hb1 != hb2)
spin_unlock(&hb2->lock);
double_unlock_hb(hb1, hb2);
/* drop_futex_key_refs() must be called outside the spinlocks. */
while (--drop_count >= 0)
@@ -1063,7 +1019,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct futex_pi_state *pi_state = q->pi_state;
struct task_struct *oldowner = pi_state->owner;
u32 uval, curval, newval;
int ret, attempt = 0;
int ret;
/* Owner died? */
if (!pi_state->owner)
@@ -1076,11 +1032,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
* in the user space variable. This must be atomic as we have
* to preserve the owner died bit here.
*
* Note: We write the user space value _before_ changing the
* pi_state because we can fault here. Imagine swapped out
* pages or a fork, which was running right before we acquired
* mmap_sem, that marked all the anonymous memory readonly for
* cow.
* Note: We write the user space value _before_ changing the pi_state
* because we can fault here. Imagine swapped out pages or a fork
* that marked all the anonymous memory readonly for cow.
*
* Modifying pi_state _before_ the user space value would
* leave the pi_state in an inconsistent state when we fault
@@ -1136,7 +1090,7 @@ retry:
handle_fault:
spin_unlock(q->lock_ptr);
ret = futex_handle_fault((unsigned long)uaddr, attempt++);
ret = get_user(uval, uaddr);
spin_lock(q->lock_ptr);
@@ -1185,10 +1139,11 @@ retry:
if (unlikely(ret != 0))
goto out;
retry_private:
hb = queue_lock(&q);
/*
* Access the page AFTER the futex is queued.
* Access the page AFTER the hash-bucket is locked.
* Order is important:
*
* Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
@@ -1204,20 +1159,23 @@ retry:
* a wakeup when *uaddr != val on entry to the syscall. This is
* rare, but normal.
*
* for shared futexes, we hold the mmap semaphore, so the mapping
* For shared futexes, we hold the mmap semaphore, so the mapping
* cannot have changed since we looked it up in get_futex_key.
*/
ret = get_futex_value_locked(&uval, uaddr);
if (unlikely(ret)) {
queue_unlock(&q, hb);
put_futex_key(fshared, &q.key);
ret = get_user(uval, uaddr);
if (ret)
goto out_put_key;
if (!ret)
goto retry;
goto out;
if (!fshared)
goto retry_private;
put_futex_key(fshared, &q.key);
goto retry;
}
ret = -EWOULDBLOCK;
if (unlikely(uval != val)) {
@@ -1248,16 +1206,13 @@ retry:
if (!abs_time)
schedule();
else {
unsigned long slack;
slack = current->timer_slack_ns;
if (rt_task(current))
slack = 0;
hrtimer_init_on_stack(&t.timer,
clockrt ? CLOCK_REALTIME :
CLOCK_MONOTONIC,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(&t, current);
hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
hrtimer_set_expires_range_ns(&t.timer, *abs_time,
current->timer_slack_ns);
hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
if (!hrtimer_active(&t.timer))
@@ -1354,7 +1309,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
struct futex_q q;
int ret, lock_taken, ownerdied = 0, attempt = 0;
int ret, lock_taken, ownerdied = 0;
if (refill_pi_state_cache())
return -ENOMEM;
@@ -1374,7 +1329,7 @@ retry:
if (unlikely(ret != 0))
goto out;
retry_unlocked:
retry_private:
hb = queue_lock(&q);
retry_locked:
@@ -1458,6 +1413,7 @@ retry_locked:
* exit to complete.
*/
queue_unlock(&q, hb);
put_futex_key(fshared, &q.key);
cond_resched();
goto retry;
@@ -1564,6 +1520,13 @@ retry_locked:
}
}
/*
* If fixup_pi_state_owner() faulted and was unable to handle the
* fault, unlock it and return the fault to userspace.
*/
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
rt_mutex_unlock(&q.pi_state->pi_mutex);
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
@@ -1591,22 +1554,18 @@ uaddr_faulted:
*/
queue_unlock(&q, hb);
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, attempt);
if (ret)
goto out_put_key;
goto retry_unlocked;
}
ret = get_user(uval, uaddr);
if (!ret)
goto retry;
if (ret)
goto out_put_key;
if (to)
destroy_hrtimer_on_stack(&to->timer);
return ret;
if (!fshared)
goto retry_private;
put_futex_key(fshared, &q.key);
goto retry;
}
/*
* Userspace attempted a TID -> 0 atomic transition, and failed.
* This is the in-kernel slowpath: we look up the PI state (if any),
@@ -1619,7 +1578,7 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared)
u32 uval;
struct plist_head *head;
union futex_key key = FUTEX_KEY_INIT;
int ret, attempt = 0;
int ret;
retry:
if (get_user(uval, uaddr))
@@ -1635,7 +1594,6 @@ retry:
goto out;
hb = hash_futex(&key);
retry_unlocked:
spin_lock(&hb->lock);
/*
@@ -1700,14 +1658,7 @@ pi_faulted:
* we have to drop the mmap_sem in order to call get_user().
*/
spin_unlock(&hb->lock);
if (attempt++) {
ret = futex_handle_fault((unsigned long)uaddr, attempt);
if (ret)
goto out;
uval = 0;
goto retry_unlocked;
}
put_futex_key(fshared, &key);
ret = get_user(uval, uaddr);
if (!ret)

View File

@@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq)
desc->irq_count = 0;
desc->irqs_unhandled = 0;
#ifdef CONFIG_SMP
cpumask_setall(&desc->affinity);
cpumask_setall(desc->affinity);
#ifdef CONFIG_GENERIC_PENDING_IRQ
cpumask_clear(desc->pending_mask);
#endif
#endif
spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -78,6 +81,7 @@ void dynamic_irq_cleanup(unsigned int irq)
desc->handle_irq = handle_bad_irq;
desc->chip = &no_irq_chip;
desc->name = NULL;
clear_kstat_irqs(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
@@ -290,7 +294,8 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
desc->chip->mask_ack(irq);
else {
desc->chip->mask(irq);
desc->chip->ack(irq);
if (desc->chip->ack)
desc->chip->ack(irq);
}
}
@@ -476,7 +481,8 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
kstat_incr_irqs_this_cpu(irq, desc);
/* Start handling the irq */
desc->chip->ack(irq);
if (desc->chip->ack)
desc->chip->ack(irq);
desc = irq_remap_to_desc(irq, desc);
/* Mark the IRQ currently in progress.*/

View File

@@ -17,6 +17,7 @@
#include <linux/kernel_stat.h>
#include <linux/rculist.h>
#include <linux/hash.h>
#include <linux/bootmem.h>
#include "internals.h"
@@ -69,6 +70,7 @@ int nr_irqs = NR_IRQS;
EXPORT_SYMBOL_GPL(nr_irqs);
#ifdef CONFIG_SPARSE_IRQ
static struct irq_desc irq_desc_init = {
.irq = -1,
.status = IRQ_DISABLED,
@@ -76,26 +78,25 @@ static struct irq_desc irq_desc_init = {
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
};
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
{
unsigned long bytes;
char *ptr;
int node;
/* Compute how many bytes we need per irq and allocate them */
bytes = nr * sizeof(unsigned int);
void *ptr;
node = cpu_to_node(cpu);
ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node);
ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
if (ptr)
desc->kstat_irqs = (unsigned int *)ptr;
/*
* don't overwite if can not get new one
* init_copy_kstat_irqs() could still use old one
*/
if (ptr) {
printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n",
cpu, node);
desc->kstat_irqs = ptr;
}
}
static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
@@ -113,6 +114,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
printk(KERN_ERR "can not alloc kstat_irqs\n");
BUG_ON(1);
}
if (!init_alloc_desc_masks(desc, cpu, false)) {
printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
BUG_ON(1);
}
arch_init_chip_data(desc, cpu);
}
@@ -121,7 +126,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
*/
DEFINE_SPINLOCK(sparse_irq_lock);
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
struct irq_desc **irq_desc_ptrs __read_mostly;
static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS_LEGACY-1] = {
@@ -131,14 +136,10 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
}
};
/* FIXME: use bootmem alloc ...*/
static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
static unsigned int *kstat_irqs_legacy;
int __init early_irq_init(void)
{
@@ -148,18 +149,30 @@ int __init early_irq_init(void)
init_irq_default_affinity();
/* initialize nr_irqs based on nr_cpu_ids */
arch_probe_nr_irqs();
printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
desc = irq_desc_legacy;
legacy_count = ARRAY_SIZE(irq_desc_legacy);
/* allocate irq_desc_ptrs array based on nr_irqs */
irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
/* allocate based on nr_cpu_ids */
/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
sizeof(int));
for (i = 0; i < legacy_count; i++) {
desc[i].irq = i;
desc[i].kstat_irqs = kstat_irqs_legacy[i];
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
init_alloc_desc_masks(&desc[i], 0, true);
irq_desc_ptrs[i] = desc + i;
}
for (i = legacy_count; i < NR_IRQS; i++)
for (i = legacy_count; i < nr_irqs; i++)
irq_desc_ptrs[i] = NULL;
return arch_early_irq_init();
@@ -167,7 +180,10 @@ int __init early_irq_init(void)
struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
if (irq_desc_ptrs && irq < nr_irqs)
return irq_desc_ptrs[irq];
return NULL;
}
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
@@ -176,10 +192,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
unsigned long flags;
int node;
if (irq >= NR_IRQS) {
printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
irq, NR_IRQS);
WARN_ON(1);
if (irq >= nr_irqs) {
WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
irq, nr_irqs);
return NULL;
}
@@ -221,12 +236,10 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
#ifdef CONFIG_SMP
.affinity = CPU_MASK_ALL
#endif
}
};
static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
int __init early_irq_init(void)
{
struct irq_desc *desc;
@@ -235,12 +248,16 @@ int __init early_irq_init(void)
init_irq_default_affinity();
printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
desc = irq_desc;
count = ARRAY_SIZE(irq_desc);
for (i = 0; i < count; i++)
for (i = 0; i < count; i++) {
desc[i].irq = i;
init_alloc_desc_masks(&desc[i], 0, true);
desc[i].kstat_irqs = kstat_irqs_all[i];
}
return arch_early_irq_init();
}
@@ -255,6 +272,11 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
}
#endif /* !CONFIG_SPARSE_IRQ */
void clear_kstat_irqs(struct irq_desc *desc)
{
memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
}
/*
* What should we do if we get a hw irq event on an illegal vector?
* Each architecture has to answer this themself.
@@ -328,6 +350,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
if (!(action->flags & IRQF_DISABLED))
local_irq_enable_in_hardirq();
@@ -347,6 +371,11 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
}
#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
#ifdef CONFIG_ENABLE_WARN_DEPRECATED
# warning __do_IRQ is deprecated. Please convert to proper flow handlers
#endif
/**
* __do_IRQ - original all in one highlevel IRQ handler
* @irq: the interrupt number
@@ -467,12 +496,10 @@ void early_init_irq_lock_class(void)
}
}
#ifdef CONFIG_SPARSE_IRQ
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
{
struct irq_desc *desc = irq_to_desc(irq);
return desc ? desc->kstat_irqs[cpu] : 0;
}
#endif
EXPORT_SYMBOL(kstat_irqs_cpu);

View File

@@ -15,8 +15,16 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
extern struct lock_class_key irq_desc_lock_class;
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
extern void clear_kstat_irqs(struct irq_desc *desc);
extern spinlock_t sparse_irq_lock;
#ifdef CONFIG_SPARSE_IRQ
/* irq_desc_ptrs allocated at boot time */
extern struct irq_desc **irq_desc_ptrs;
#else
/* irq_desc_ptrs is a fixed size array */
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
#endif
#ifdef CONFIG_PROC_FS
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);

View File

@@ -90,14 +90,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
cpumask_copy(&desc->affinity, cpumask);
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
} else {
desc->status |= IRQ_MOVE_PENDING;
cpumask_copy(&desc->pending_mask, cpumask);
cpumask_copy(desc->pending_mask, cpumask);
}
#else
cpumask_copy(&desc->affinity, cpumask);
cpumask_copy(desc->affinity, cpumask);
desc->chip->set_affinity(irq, cpumask);
#endif
desc->status |= IRQ_AFFINITY_SET;
@@ -109,7 +109,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
/*
* Generic version of the affinity autoselector.
*/
int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
static int setup_affinity(unsigned int irq, struct irq_desc *desc)
{
if (!irq_can_set_affinity(irq))
return 0;
@@ -119,21 +119,21 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
* one of the targets is online.
*/
if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
if (cpumask_any_and(&desc->affinity, cpu_online_mask)
if (cpumask_any_and(desc->affinity, cpu_online_mask)
< nr_cpu_ids)
goto set_affinity;
else
desc->status &= ~IRQ_AFFINITY_SET;
}
cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
set_affinity:
desc->chip->set_affinity(irq, &desc->affinity);
desc->chip->set_affinity(irq, desc->affinity);
return 0;
}
#else
static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d)
static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
{
return irq_select_affinity(irq);
}
@@ -149,14 +149,14 @@ int irq_select_affinity_usr(unsigned int irq)
int ret;
spin_lock_irqsave(&desc->lock, flags);
ret = do_irq_select_affinity(irq, desc);
ret = setup_affinity(irq, desc);
spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
#else
static inline int do_irq_select_affinity(int irq, struct irq_desc *desc)
static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
{
return 0;
}
@@ -389,9 +389,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
* allocate special interrupts that are part of the architecture.
*/
static int
__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
{
struct irqaction *old, **p;
struct irqaction *old, **old_ptr;
const char *old_name = NULL;
unsigned long flags;
int shared = 0;
@@ -423,8 +423,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
* The following block of code has to be executed atomically
*/
spin_lock_irqsave(&desc->lock, flags);
p = &desc->action;
old = *p;
old_ptr = &desc->action;
old = *old_ptr;
if (old) {
/*
* Can't share interrupts unless both agree to and are
@@ -447,8 +447,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
/* add new interrupt at end of irq queue */
do {
p = &old->next;
old = *p;
old_ptr = &old->next;
old = *old_ptr;
} while (old);
shared = 1;
}
@@ -488,7 +488,7 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
desc->status |= IRQ_NO_BALANCING;
/* Set default affinity mask once everything is setup */
do_irq_select_affinity(irq, desc);
setup_affinity(irq, desc);
} else if ((new->flags & IRQF_TRIGGER_MASK)
&& (new->flags & IRQF_TRIGGER_MASK)
@@ -499,7 +499,7 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
(int)(new->flags & IRQF_TRIGGER_MASK));
}
*p = new;
*old_ptr = new;
/* Reset broken irq detection when installing new handler */
desc->irq_count = 0;
@@ -549,9 +549,102 @@ int setup_irq(unsigned int irq, struct irqaction *act)
return __setup_irq(irq, desc, act);
}
EXPORT_SYMBOL_GPL(setup_irq);
/*
* Internal function to unregister an irqaction - used to free
* regular and special interrupts that are part of the architecture.
*/
static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction *action, **action_ptr;
unsigned long flags;
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
if (!desc)
return NULL;
spin_lock_irqsave(&desc->lock, flags);
/*
* There can be multiple actions per IRQ descriptor, find the right
* one based on the dev_id:
*/
action_ptr = &desc->action;
for (;;) {
action = *action_ptr;
if (!action) {
WARN(1, "Trying to free already-free IRQ %d\n", irq);
spin_unlock_irqrestore(&desc->lock, flags);
return NULL;
}
if (action->dev_id == dev_id)
break;
action_ptr = &action->next;
}
/* Found it - now remove it from the list of entries: */
*action_ptr = action->next;
/* Currently used only by UML, might disappear one day: */
#ifdef CONFIG_IRQ_RELEASE_METHOD
if (desc->chip->release)
desc->chip->release(irq, dev_id);
#endif
/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
desc->status |= IRQ_DISABLED;
if (desc->chip->shutdown)
desc->chip->shutdown(irq);
else
desc->chip->disable(irq);
}
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
/* Make sure it's not being used on another CPU: */
synchronize_irq(irq);
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
* event to happen even now it's being freed, so let's make sure that
* is so by doing an extra call to the handler ....
*
* ( We do this after actually deregistering it, to make sure that a
* 'real' IRQ doesn't run in * parallel with our fake. )
*/
if (action->flags & IRQF_SHARED) {
local_irq_save(flags);
action->handler(irq, dev_id);
local_irq_restore(flags);
}
#endif
return action;
}
/**
* free_irq - free an interrupt
* remove_irq - free an interrupt
* @irq: Interrupt line to free
* @act: irqaction for the interrupt
*
* Used to remove interrupts statically setup by the early boot process.
*/
void remove_irq(unsigned int irq, struct irqaction *act)
{
__free_irq(irq, act->dev_id);
}
EXPORT_SYMBOL_GPL(remove_irq);
/**
* free_irq - free an interrupt allocated with request_irq
* @irq: Interrupt line to free
* @dev_id: Device identity to free
*
@@ -566,73 +659,7 @@ int setup_irq(unsigned int irq, struct irqaction *act)
*/
void free_irq(unsigned int irq, void *dev_id)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irqaction **p;
unsigned long flags;
WARN_ON(in_interrupt());
if (!desc)
return;
spin_lock_irqsave(&desc->lock, flags);
p = &desc->action;
for (;;) {
struct irqaction *action = *p;
if (action) {
struct irqaction **pp = p;
p = &action->next;
if (action->dev_id != dev_id)
continue;
/* Found it - now remove it from the list of entries */
*pp = action->next;
/* Currently used only by UML, might disappear one day.*/
#ifdef CONFIG_IRQ_RELEASE_METHOD
if (desc->chip->release)
desc->chip->release(irq, dev_id);
#endif
if (!desc->action) {
desc->status |= IRQ_DISABLED;
if (desc->chip->shutdown)
desc->chip->shutdown(irq);
else
desc->chip->disable(irq);
}
spin_unlock_irqrestore(&desc->lock, flags);
unregister_handler_proc(irq, action);
/* Make sure it's not being used on another CPU */
synchronize_irq(irq);
#ifdef CONFIG_DEBUG_SHIRQ
/*
* It's a shared IRQ -- the driver ought to be
* prepared for it to happen even now it's
* being freed, so let's make sure.... We do
* this after actually deregistering it, to
* make sure that a 'real' IRQ doesn't run in
* parallel with our fake
*/
if (action->flags & IRQF_SHARED) {
local_irq_save(flags);
action->handler(irq, dev_id);
local_irq_restore(flags);
}
#endif
kfree(action);
return;
}
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
#ifdef CONFIG_DEBUG_SHIRQ
dump_stack();
#endif
spin_unlock_irqrestore(&desc->lock, flags);
return;
}
kfree(__free_irq(irq, dev_id));
}
EXPORT_SYMBOL(free_irq);
@@ -679,11 +706,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
* the behavior is classified as "will not fix" so we need to
* start nudging drivers away from using that idiom.
*/
if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
== (IRQF_SHARED|IRQF_DISABLED))
pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
"guaranteed on shared IRQs\n",
irq, devname);
if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
(IRQF_SHARED|IRQF_DISABLED)) {
pr_warning(
"IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
irq, devname);
}
#ifdef CONFIG_LOCKDEP
/*
@@ -709,15 +737,13 @@ int request_irq(unsigned int irq, irq_handler_t handler,
if (!handler)
return -EINVAL;
action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
if (!action)
return -ENOMEM;
action->handler = handler;
action->flags = irqflags;
cpus_clear(action->mask);
action->name = devname;
action->next = NULL;
action->dev_id = dev_id;
retval = __setup_irq(irq, desc, action);

View File

@@ -18,7 +18,7 @@ void move_masked_irq(int irq)
desc->status &= ~IRQ_MOVE_PENDING;
if (unlikely(cpumask_empty(&desc->pending_mask)))
if (unlikely(cpumask_empty(desc->pending_mask)))
return;
if (!desc->chip->set_affinity)
@@ -38,13 +38,13 @@ void move_masked_irq(int irq)
* For correct operation this depends on the caller
* masking the irqs.
*/
if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
< nr_cpu_ids)) {
cpumask_and(&desc->affinity,
&desc->pending_mask, cpu_online_mask);
desc->chip->set_affinity(irq, &desc->affinity);
cpumask_and(desc->affinity,
desc->pending_mask, cpu_online_mask);
desc->chip->set_affinity(irq, desc->affinity);
}
cpumask_clear(&desc->pending_mask);
cpumask_clear(desc->pending_mask);
}
void move_native_irq(int irq)

View File

@@ -17,16 +17,11 @@ static void init_copy_kstat_irqs(struct irq_desc *old_desc,
struct irq_desc *desc,
int cpu, int nr)
{
unsigned long bytes;
init_kstat_irqs(desc, cpu, nr);
if (desc->kstat_irqs != old_desc->kstat_irqs) {
/* Compute how many bytes we need per irq and allocate them */
bytes = nr * sizeof(unsigned int);
memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
}
if (desc->kstat_irqs != old_desc->kstat_irqs)
memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
nr * sizeof(*desc->kstat_irqs));
}
static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -38,15 +33,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
old_desc->kstat_irqs = NULL;
}
static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
struct irq_desc *desc, int cpu)
{
memcpy(desc, old_desc, sizeof(struct irq_desc));
if (!init_alloc_desc_masks(desc, cpu, false)) {
printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
"for migration.\n", irq);
return false;
}
spin_lock_init(&desc->lock);
desc->cpu = cpu;
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
init_copy_desc_masks(old_desc, desc);
arch_init_copy_chip_data(old_desc, desc, cpu);
return true;
}
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -76,12 +78,18 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
node = cpu_to_node(cpu);
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
if (!desc) {
printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
printk(KERN_ERR "irq %d: can not get new irq_desc "
"for migration.\n", irq);
/* still use old one */
desc = old_desc;
goto out_unlock;
}
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
/* still use old one */
kfree(desc);
desc = old_desc;
goto out_unlock;
}
irq_desc_ptrs[irq] = desc;
spin_unlock_irqrestore(&sparse_irq_lock, flags);

View File

@@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir;
static int irq_affinity_proc_show(struct seq_file *m, void *v)
{
struct irq_desc *desc = irq_to_desc((long)m->private);
const struct cpumask *mask = &desc->affinity;
const struct cpumask *mask = desc->affinity;
#ifdef CONFIG_GENERIC_PENDING_IRQ
if (desc->status & IRQ_MOVE_PENDING)
mask = &desc->pending_mask;
mask = desc->pending_mask;
#endif
seq_cpumask(m, mask);
seq_putc(m, '\n');

View File

@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
return ok;
}
static void poll_spurious_irqs(unsigned long dummy)
static void poll_all_shared_irqs(void)
{
struct irq_desc *desc;
int i;
@@ -123,11 +123,23 @@ static void poll_spurious_irqs(unsigned long dummy)
try_one_irq(i, desc);
}
}
static void poll_spurious_irqs(unsigned long dummy)
{
poll_all_shared_irqs();
mod_timer(&poll_spurious_irq_timer,
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
}
#ifdef CONFIG_DEBUG_SHIRQ
void debug_poll_all_shared_irqs(void)
{
poll_all_shared_irqs();
}
#endif
/*
* If 99,900 of the previous 100,000 interrupts have not been handled
* then assume that the IRQ is stuck in some manner. Drop a diagnostic

View File

@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
return;
memset(&prstatus, 0, sizeof(prstatus));
prstatus.pr_pid = current->pid;
elf_core_copy_regs(&prstatus.pr_reg, regs);
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
&prstatus, sizeof(prstatus));
final_note(buf);

View File

@@ -9,6 +9,44 @@
* as published by the Free Software Foundation; version 2
* of the License.
*/
/*
* CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
* used by the "latencytop" userspace tool. The latency that is tracked is not
* the 'traditional' interrupt latency (which is primarily caused by something
* else consuming CPU), but instead, it is the latency an application encounters
* because the kernel sleeps on its behalf for various reasons.
*
* This code tracks 2 levels of statistics:
* 1) System level latency
* 2) Per process latency
*
* The latency is stored in fixed sized data structures in an accumulated form;
* if the "same" latency cause is hit twice, this will be tracked as one entry
* in the data structure. Both the count, total accumulated latency and maximum
* latency are tracked in this data structure. When the fixed size structure is
* full, no new causes are tracked until the buffer is flushed by writing to
* the /proc file; the userspace tool does this on a regular basis.
*
* A latency cause is identified by a stringified backtrace at the point that
* the scheduler gets invoked. The userland tool will use this string to
* identify the cause of the latency in human readable form.
*
* The information is exported via /proc/latency_stats and /proc/<pid>/latency.
* These files look like this:
*
* Latency Top version : v0.1
* 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
* | | | |
* | | | +----> the stringified backtrace
* | | +---------> The maximum latency for this entry in microseconds
* | +--------------> The accumulated latency for this entry (microseconds)
* +-------------------> The number of times this entry is hit
*
* (note: the average latency is the accumulated latency divided by the number
* of times)
*/
#include <linux/latencytop.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
@@ -72,7 +110,7 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
firstnonnull = i;
continue;
}
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
unsigned long record = lat->backtrace[q];
if (latency_record[i].backtrace[q] != record) {
@@ -101,31 +139,52 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
memcpy(&latency_record[i], lat, sizeof(struct latency_record));
}
static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
/*
* Iterator to store a backtrace into a latency record entry
*/
static inline void store_stacktrace(struct task_struct *tsk,
struct latency_record *lat)
{
struct stack_trace trace;
memset(&trace, 0, sizeof(trace));
trace.max_entries = LT_BACKTRACEDEPTH;
trace.entries = &lat->backtrace[0];
trace.skip = 0;
save_stack_trace_tsk(tsk, &trace);
}
/**
* __account_scheduler_latency - record an occured latency
* @tsk - the task struct of the task hitting the latency
* @usecs - the duration of the latency in microseconds
* @inter - 1 if the sleep was interruptible, 0 if uninterruptible
*
* This function is the main entry point for recording latency entries
* as called by the scheduler.
*
* This function has a few special cases to deal with normal 'non-latency'
* sleeps: specifically, interruptible sleep longer than 5 msec is skipped
* since this usually is caused by waiting for events via select() and co.
*
* Negative latencies (caused by time going backwards) are also explicitly
* skipped.
*/
void __sched
account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
{
unsigned long flags;
int i, q;
struct latency_record lat;
if (!latencytop_enabled)
return;
/* Long interruptible waits are generally user requested... */
if (inter && usecs > 5000)
return;
/* Negative sleeps are time going backwards */
/* Zero-time sleeps are non-interesting */
if (usecs <= 0)
return;
memset(&lat, 0, sizeof(lat));
lat.count = 1;
lat.time = usecs;
@@ -143,12 +202,12 @@ account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
if (tsk->latency_record_count >= LT_SAVECOUNT)
goto out_unlock;
for (i = 0; i < LT_SAVECOUNT ; i++) {
for (i = 0; i < LT_SAVECOUNT; i++) {
struct latency_record *mylat;
int same = 1;
mylat = &tsk->latency_record[i];
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
unsigned long record = lat.backtrace[q];
if (mylat->backtrace[q] != record) {
@@ -186,7 +245,7 @@ static int lstats_show(struct seq_file *m, void *v)
for (i = 0; i < MAXLR; i++) {
if (latency_record[i].backtrace[0]) {
int q;
seq_printf(m, "%i %li %li ",
seq_printf(m, "%i %lu %lu ",
latency_record[i].count,
latency_record[i].time,
latency_record[i].max);
@@ -223,7 +282,7 @@ static int lstats_open(struct inode *inode, struct file *filp)
return single_open(filp, lstats_show, NULL);
}
static struct file_operations lstats_fops = {
static const struct file_operations lstats_fops = {
.open = lstats_open,
.read = seq_read,
.write = lstats_write,
@@ -236,4 +295,4 @@ static int __init init_lstats_procfs(void)
proc_create("latency_stats", 0644, NULL, &lstats_fops);
return 0;
}
__initcall(init_lstats_procfs);
device_initcall(init_lstats_procfs);

View File

@@ -51,6 +51,7 @@
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/percpu.h>
#if 0
#define DEBUGP printk
@@ -366,6 +367,34 @@ static struct module *find_module(const char *name)
}
#ifdef CONFIG_SMP
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
static void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
{
void *ptr;
if (align > PAGE_SIZE) {
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
name, align, PAGE_SIZE);
align = PAGE_SIZE;
}
ptr = __alloc_reserved_percpu(size, align);
if (!ptr)
printk(KERN_WARNING
"Could not allocate %lu bytes percpu data\n", size);
return ptr;
}
static void percpu_modfree(void *freeme)
{
free_percpu(freeme);
}
#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
/* Number of blocks used and allocated. */
static unsigned int pcpu_num_used, pcpu_num_allocated;
/* Size of each block. -ve means used. */
@@ -480,21 +509,6 @@ static void percpu_modfree(void *freeme)
}
}
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
}
static int percpu_modinit(void)
{
pcpu_num_used = 2;
@@ -513,7 +527,26 @@ static int percpu_modinit(void)
return 0;
}
__initcall(percpu_modinit);
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
Elf_Shdr *sechdrs,
const char *secstrings)
{
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
}
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
{
int cpu;
for_each_possible_cpu(cpu)
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
}
#else /* ... !CONFIG_SMP */
static inline void *percpu_modalloc(unsigned long size, unsigned long align,
const char *name)
{
@@ -535,6 +568,7 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
/* pcpusec should be 0, and size of that section should be 0. */
BUG_ON(size != 0);
}
#endif /* CONFIG_SMP */
#define MODINFO_ATTR(field) \
@@ -822,7 +856,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
mutex_lock(&module_mutex);
/* Store the name of the last unloaded module for diagnostic purposes */
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
unregister_dynamic_debug_module(mod->name);
ddebug_remove_module(mod->name);
free_module(mod);
out:
@@ -1827,19 +1861,13 @@ static inline void add_kallsyms(struct module *mod,
}
#endif /* CONFIG_KALLSYMS */
static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
{
#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
unsigned int i;
for (i = 0; i < num; i++) {
register_dynamic_debug_module(debug[i].modname,
debug[i].type,
debug[i].logical_modname,
debug[i].flag_names,
debug[i].hash, debug[i].hash2);
}
#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
#ifdef CONFIG_DYNAMIC_DEBUG
if (ddebug_add_module(debug, num, debug->modname))
printk(KERN_ERR "dynamic debug error adding module: %s\n",
debug->modname);
#endif
}
static void *module_alloc_update_bounds(unsigned long size)
@@ -2213,12 +2241,13 @@ static noinline struct module *load_module(void __user *umod,
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
if (!mod->taints) {
struct mod_debug *debug;
struct _ddebug *debug;
unsigned int num_debug;
debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
sizeof(*debug), &num_debug);
dynamic_printk_setup(debug, num_debug);
if (debug)
dynamic_debug_setup(debug, num_debug);
}
/* sechdrs[0].sh_size is always zero */

View File

@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
#ifdef CONFIG_DEBUG_BUGVERBOSE
dump_stack();
#endif
bust_spinlocks(0);
/*
@@ -355,15 +358,18 @@ EXPORT_SYMBOL(warn_slowpath);
#endif
#ifdef CONFIG_CC_STACKPROTECTOR
/*
* Called when gcc's -fstack-protector feature is used, and
* gcc detects corruption of the on-stack canary value
*/
void __stack_chk_fail(void)
{
panic("stack-protector: Kernel stack is corrupted");
panic("stack-protector: Kernel stack is corrupted in: %p\n",
__builtin_return_address(0));
}
EXPORT_SYMBOL(__stack_chk_fail);
#endif
core_param(panic, panic_timeout, int, 0644);

View File

@@ -1370,7 +1370,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1;
}
return 0;
return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
}
/*

View File

@@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
* from the scheduler (trying to re-grab
* rq->lock), so defer it.
*/
__mod_timer(&buf->timer, jiffies + 1);
mod_timer(&buf->timer, jiffies + 1);
}
old = buf->data;

File diff suppressed because it is too large Load Diff

View File

@@ -24,11 +24,11 @@
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
* consistent between cpus (never more than 2 jiffies difference).
*/
#include <linux/sched.h>
#include <linux/percpu.h>
#include <linux/spinlock.h>
#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/ktime.h>
#include <linux/sched.h>
/*
* Scheduler clock - returns current time in nanosec units.
@@ -43,6 +43,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
static __read_mostly int sched_clock_running;
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
__read_mostly int sched_clock_stable;
struct sched_clock_data {
/*
@@ -87,7 +88,7 @@ void sched_clock_init(void)
}
/*
* min,max except they take wrapping into account
* min, max except they take wrapping into account
*/
static inline u64 wrap_min(u64 x, u64 y)
@@ -111,15 +112,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
s64 delta = now - scd->tick_raw;
u64 clock, min_clock, max_clock;
WARN_ON_ONCE(!irqs_disabled());
if (unlikely(delta < 0))
delta = 0;
/*
* scd->clock = clamp(scd->tick_gtod + delta,
* max(scd->tick_gtod, scd->clock),
* scd->tick_gtod + TICK_NSEC);
* max(scd->tick_gtod, scd->clock),
* scd->tick_gtod + TICK_NSEC);
*/
clock = scd->tick_gtod + delta;
@@ -148,12 +147,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
u64 sched_clock_cpu(int cpu)
{
struct sched_clock_data *scd = cpu_sdc(cpu);
u64 now, clock, this_clock, remote_clock;
struct sched_clock_data *scd;
if (unlikely(!sched_clock_running))
return 0ull;
if (sched_clock_stable)
return sched_clock();
scd = cpu_sdc(cpu);
WARN_ON_ONCE(!irqs_disabled());
now = sched_clock();
@@ -195,14 +195,18 @@ u64 sched_clock_cpu(int cpu)
void sched_clock_tick(void)
{
struct sched_clock_data *scd = this_scd();
struct sched_clock_data *scd;
u64 now, now_gtod;
if (sched_clock_stable)
return;
if (unlikely(!sched_clock_running))
return;
WARN_ON_ONCE(!irqs_disabled());
scd = this_scd();
now_gtod = ktime_to_ns(ktime_get());
now = sched_clock();
@@ -250,7 +254,7 @@ u64 sched_clock_cpu(int cpu)
return sched_clock();
}
#endif
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
unsigned long long cpu_clock(int cpu)
{

View File

@@ -272,7 +272,6 @@ static void print_cpu(struct seq_file *m, int cpu)
P(nr_switches);
P(nr_load_updates);
P(nr_uninterruptible);
SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies);
PN(next_balance);
P(curr->pid);
PN(clock);
@@ -287,9 +286,6 @@ static void print_cpu(struct seq_file *m, int cpu)
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
P(yld_exp_empty);
P(yld_act_empty);
P(yld_both_empty);
P(yld_count);
P(sched_switch);
@@ -314,7 +310,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
@@ -325,6 +321,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
#define PN(x) \
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
P(jiffies);
PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity);
@@ -397,6 +394,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
PN(se.vruntime);
PN(se.sum_exec_runtime);
PN(se.avg_overlap);
PN(se.avg_wakeup);
nr_switches = p->nvcsw + p->nivcsw;

View File

@@ -1314,16 +1314,63 @@ out:
}
#endif /* CONFIG_SMP */
static unsigned long wakeup_gran(struct sched_entity *se)
/*
* Adaptive granularity
*
* se->avg_wakeup gives the average time a task runs until it does a wakeup,
* with the limit of wakeup_gran -- when it never does a wakeup.
*
* So the smaller avg_wakeup is the faster we want this task to preempt,
* but we don't want to treat the preemptee unfairly and therefore allow it
* to run for at least the amount of time we'd like to run.
*
* NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
*
* NOTE: we use *nr_running to scale with load, this nicely matches the
* degrading latency on load.
*/
static unsigned long
adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
{
u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
u64 gran = 0;
if (this_run < expected_wakeup)
gran = expected_wakeup - this_run;
return min_t(s64, gran, sysctl_sched_wakeup_granularity);
}
static unsigned long
wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
{
unsigned long gran = sysctl_sched_wakeup_granularity;
if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
gran = adaptive_gran(curr, se);
/*
* More easily preempt - nice tasks, while not making it harder for
* + nice tasks.
* Since its curr running now, convert the gran from real-time
* to virtual-time in his units.
*/
if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
if (sched_feat(ASYM_GRAN)) {
/*
* By using 'se' instead of 'curr' we penalize light tasks, so
* they get preempted easier. That is, if 'se' < 'curr' then
* the resulting gran will be larger, therefore penalizing the
* lighter, if otoh 'se' > 'curr' then the resulting gran will
* be smaller, again penalizing the lighter task.
*
* This is especially important for buddies when the leftmost
* task is higher priority than the buddy.
*/
if (unlikely(se->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, se);
} else {
if (unlikely(curr->load.weight != NICE_0_LOAD))
gran = calc_delta_fair(gran, curr);
}
return gran;
}
@@ -1350,7 +1397,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
if (vdiff <= 0)
return -1;
gran = wakeup_gran(curr);
gran = wakeup_gran(curr, se);
if (vdiff > gran)
return 1;

View File

@@ -1,5 +1,6 @@
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
SCHED_FEAT(NORMALIZED_SLEEPER, 0)
SCHED_FEAT(ADAPTIVE_GRAN, 1)
SCHED_FEAT(WAKEUP_PREEMPT, 1)
SCHED_FEAT(START_DEBIT, 1)
SCHED_FEAT(AFFINE_WAKEUPS, 1)

View File

@@ -3,6 +3,40 @@
* policies)
*/
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
{
return container_of(rt_se, struct task_struct, rt);
}
#ifdef CONFIG_RT_GROUP_SCHED
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
return rt_se->rt_rq;
}
#else /* CONFIG_RT_GROUP_SCHED */
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
struct task_struct *p = rt_task_of(rt_se);
struct rq *rq = task_rq(p);
return &rq->rt;
}
#endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_SMP
static inline int rt_overloaded(struct rq *rq)
@@ -37,25 +71,69 @@ static inline void rt_clear_overload(struct rq *rq)
cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
}
static void update_rt_migration(struct rq *rq)
static void update_rt_migration(struct rt_rq *rt_rq)
{
if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
if (!rq->rt.overloaded) {
rt_set_overload(rq);
rq->rt.overloaded = 1;
if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
if (!rt_rq->overloaded) {
rt_set_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 1;
}
} else if (rq->rt.overloaded) {
rt_clear_overload(rq);
rq->rt.overloaded = 0;
} else if (rt_rq->overloaded) {
rt_clear_overload(rq_of_rt_rq(rt_rq));
rt_rq->overloaded = 0;
}
}
#endif /* CONFIG_SMP */
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
return container_of(rt_se, struct task_struct, rt);
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory++;
update_rt_migration(rt_rq);
}
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
if (rt_se->nr_cpus_allowed > 1)
rt_rq->rt_nr_migratory--;
update_rt_migration(rt_rq);
}
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
{
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
plist_node_init(&p->pushable_tasks, p->prio);
plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
}
static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
{
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
}
#else
static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
{
}
static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
{
}
static inline
void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
}
static inline
void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
}
#endif /* CONFIG_SMP */
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
{
return !list_empty(&rt_se->run_list);
@@ -79,16 +157,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return rt_rq->rq;
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
return rt_se->rt_rq;
}
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = rt_se->parent)
@@ -108,7 +176,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
if (rt_rq->rt_nr_running) {
if (rt_se && !on_rt_rq(rt_se))
enqueue_rt_entity(rt_se);
if (rt_rq->highest_prio < curr->prio)
if (rt_rq->highest_prio.curr < curr->prio)
resched_task(curr);
}
}
@@ -176,19 +244,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
#define for_each_leaf_rt_rq(rt_rq, rq) \
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
{
return container_of(rt_rq, struct rq, rt);
}
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
{
struct task_struct *p = rt_task_of(rt_se);
struct rq *rq = task_rq(p);
return &rq->rt;
}
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = NULL)
@@ -473,7 +528,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
struct rt_rq *rt_rq = group_rt_rq(rt_se);
if (rt_rq)
return rt_rq->highest_prio;
return rt_rq->highest_prio.curr;
#endif
return rt_task_of(rt_se)->prio;
@@ -547,91 +602,174 @@ static void update_curr_rt(struct rq *rq)
}
}
static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
#if defined CONFIG_SMP
static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
static inline int next_prio(struct rq *rq)
{
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
rt_rq->rt_nr_running++;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
#ifdef CONFIG_SMP
struct rq *rq = rq_of_rt_rq(rt_rq);
#endif
struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
if (next && rt_prio(next->prio))
return next->prio;
else
return MAX_RT_PRIO;
}
static void
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
if (prio < prev_prio) {
/*
* If the new task is higher in priority than anything on the
* run-queue, we know that the previous high becomes our
* next-highest.
*/
rt_rq->highest_prio.next = prev_prio;
rt_rq->highest_prio = rt_se_prio(rt_se);
#ifdef CONFIG_SMP
if (rq->online)
cpupri_set(&rq->rd->cpupri, rq->cpu,
rt_se_prio(rt_se));
#endif
}
#endif
#ifdef CONFIG_SMP
if (rt_se->nr_cpus_allowed > 1) {
struct rq *rq = rq_of_rt_rq(rt_rq);
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
rq->rt.rt_nr_migratory++;
}
} else if (prio == rt_rq->highest_prio.curr)
/*
* If the next task is equal in priority to the highest on
* the run-queue, then we implicitly know that the next highest
* task cannot be any lower than current
*/
rt_rq->highest_prio.next = prio;
else if (prio < rt_rq->highest_prio.next)
/*
* Otherwise, we need to recompute next-highest
*/
rt_rq->highest_prio.next = next_prio(rq);
}
static void
dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
{
struct rq *rq = rq_of_rt_rq(rt_rq);
if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
rt_rq->highest_prio.next = next_prio(rq);
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
}
#else /* CONFIG_SMP */
static inline
void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
static inline
void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
#endif /* CONFIG_SMP */
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
static void
inc_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
if (prio < prev_prio)
rt_rq->highest_prio.curr = prio;
inc_rt_prio_smp(rt_rq, prio, prev_prio);
}
static void
dec_rt_prio(struct rt_rq *rt_rq, int prio)
{
int prev_prio = rt_rq->highest_prio.curr;
if (rt_rq->rt_nr_running) {
WARN_ON(prio < prev_prio);
/*
* This may have been our highest task, and therefore
* we may have some recomputation to do
*/
if (prio == prev_prio) {
struct rt_prio_array *array = &rt_rq->active;
rt_rq->highest_prio.curr =
sched_find_first_bit(array->bitmap);
}
} else
rt_rq->highest_prio.curr = MAX_RT_PRIO;
dec_rt_prio_smp(rt_rq, prio, prev_prio);
}
#else
static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif
#ifdef CONFIG_RT_GROUP_SCHED
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted++;
if (rt_rq->tg)
start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
#else
}
static void
dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted--;
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
}
#else /* CONFIG_RT_GROUP_SCHED */
static void
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
start_rt_bandwidth(&def_rt_bandwidth);
#endif
}
static inline
void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
#endif /* CONFIG_RT_GROUP_SCHED */
static inline
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
int prio = rt_se_prio(rt_se);
WARN_ON(!rt_prio(prio));
rt_rq->rt_nr_running++;
inc_rt_prio(rt_rq, prio);
inc_rt_migration(rt_se, rt_rq);
inc_rt_group(rt_se, rt_rq);
}
static inline
void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
{
#ifdef CONFIG_SMP
int highest_prio = rt_rq->highest_prio;
#endif
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
WARN_ON(!rt_rq->rt_nr_running);
rt_rq->rt_nr_running--;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
if (rt_rq->rt_nr_running) {
struct rt_prio_array *array;
WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
/* recalculate */
array = &rt_rq->active;
rt_rq->highest_prio =
sched_find_first_bit(array->bitmap);
} /* otherwise leave rq->highest prio alone */
} else
rt_rq->highest_prio = MAX_RT_PRIO;
#endif
#ifdef CONFIG_SMP
if (rt_se->nr_cpus_allowed > 1) {
struct rq *rq = rq_of_rt_rq(rt_rq);
rq->rt.rt_nr_migratory--;
}
if (rt_rq->highest_prio != highest_prio) {
struct rq *rq = rq_of_rt_rq(rt_rq);
if (rq->online)
cpupri_set(&rq->rd->cpupri, rq->cpu,
rt_rq->highest_prio);
}
update_rt_migration(rq_of_rt_rq(rt_rq));
#endif /* CONFIG_SMP */
#ifdef CONFIG_RT_GROUP_SCHED
if (rt_se_boosted(rt_se))
rt_rq->rt_nr_boosted--;
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
#endif
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
dec_rt_migration(rt_se, rt_rq);
dec_rt_group(rt_se, rt_rq);
}
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -718,6 +856,9 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
enqueue_rt_entity(rt_se);
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
inc_cpu_load(rq, p->se.load.weight);
}
@@ -728,6 +869,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
update_curr_rt(rq);
dequeue_rt_entity(rt_se);
dequeue_pushable_task(rq, p);
dec_cpu_load(rq, p->se.load.weight);
}
@@ -878,7 +1021,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
return next;
}
static struct task_struct *pick_next_task_rt(struct rq *rq)
static struct task_struct *_pick_next_task_rt(struct rq *rq)
{
struct sched_rt_entity *rt_se;
struct task_struct *p;
@@ -900,6 +1043,18 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
p = rt_task_of(rt_se);
p->se.exec_start = rq->clock;
return p;
}
static struct task_struct *pick_next_task_rt(struct rq *rq)
{
struct task_struct *p = _pick_next_task_rt(rq);
/* The running task is never eligible for pushing */
if (p)
dequeue_pushable_task(rq, p);
return p;
}
@@ -907,6 +1062,13 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
{
update_curr_rt(rq);
p->se.exec_start = 0;
/*
* The previous task needs to be made eligible for pushing
* if it is still active
*/
if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
enqueue_pushable_task(rq, p);
}
#ifdef CONFIG_SMP
@@ -960,12 +1122,13 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
static inline int pick_optimal_cpu(int this_cpu,
const struct cpumask *mask)
{
int first;
/* "this_cpu" is cheaper to preempt than a remote processor */
if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
return this_cpu;
first = cpumask_first(mask);
@@ -981,6 +1144,7 @@ static int find_lowest_rq(struct task_struct *task)
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
int this_cpu = smp_processor_id();
int cpu = task_cpu(task);
cpumask_var_t domain_mask;
if (task->rt.nr_cpus_allowed == 1)
return -1; /* No other targets possible */
@@ -1013,19 +1177,25 @@ static int find_lowest_rq(struct task_struct *task)
if (this_cpu == cpu)
this_cpu = -1; /* Skip this_cpu opt if the same */
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
cpumask_t domain_mask;
int best_cpu;
if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
for_each_domain(cpu, sd) {
if (sd->flags & SD_WAKE_AFFINE) {
int best_cpu;
cpumask_and(&domain_mask, sched_domain_span(sd),
lowest_mask);
cpumask_and(domain_mask,
sched_domain_span(sd),
lowest_mask);
best_cpu = pick_optimal_cpu(this_cpu,
&domain_mask);
if (best_cpu != -1)
return best_cpu;
best_cpu = pick_optimal_cpu(this_cpu,
domain_mask);
if (best_cpu != -1) {
free_cpumask_var(domain_mask);
return best_cpu;
}
}
}
free_cpumask_var(domain_mask);
}
/*
@@ -1072,7 +1242,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
}
/* If this rq is still suitable use it. */
if (lowest_rq->rt.highest_prio > task->prio)
if (lowest_rq->rt.highest_prio.curr > task->prio)
break;
/* try again */
@@ -1083,6 +1253,31 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
return lowest_rq;
}
static inline int has_pushable_tasks(struct rq *rq)
{
return !plist_head_empty(&rq->rt.pushable_tasks);
}
static struct task_struct *pick_next_pushable_task(struct rq *rq)
{
struct task_struct *p;
if (!has_pushable_tasks(rq))
return NULL;
p = plist_first_entry(&rq->rt.pushable_tasks,
struct task_struct, pushable_tasks);
BUG_ON(rq->cpu != task_cpu(p));
BUG_ON(task_current(rq, p));
BUG_ON(p->rt.nr_cpus_allowed <= 1);
BUG_ON(!p->se.on_rq);
BUG_ON(!rt_task(p));
return p;
}
/*
* If the current CPU has more than one RT task, see if the non
* running task can migrate over to a CPU that is running a task
@@ -1092,13 +1287,11 @@ static int push_rt_task(struct rq *rq)
{
struct task_struct *next_task;
struct rq *lowest_rq;
int ret = 0;
int paranoid = RT_MAX_TRIES;
if (!rq->rt.overloaded)
return 0;
next_task = pick_next_highest_task_rt(rq, -1);
next_task = pick_next_pushable_task(rq);
if (!next_task)
return 0;
@@ -1127,16 +1320,34 @@ static int push_rt_task(struct rq *rq)
struct task_struct *task;
/*
* find lock_lowest_rq releases rq->lock
* so it is possible that next_task has changed.
* If it has, then try again.
* so it is possible that next_task has migrated.
*
* We need to make sure that the task is still on the same
* run-queue and is also still the next task eligible for
* pushing.
*/
task = pick_next_highest_task_rt(rq, -1);
if (unlikely(task != next_task) && task && paranoid--) {
put_task_struct(next_task);
next_task = task;
goto retry;
task = pick_next_pushable_task(rq);
if (task_cpu(next_task) == rq->cpu && task == next_task) {
/*
* If we get here, the task hasnt moved at all, but
* it has failed to push. We will not try again,
* since the other cpus will pull from us when they
* are ready.
*/
dequeue_pushable_task(rq, next_task);
goto out;
}
goto out;
if (!task)
/* No more tasks, just exit */
goto out;
/*
* Something has shifted, try again.
*/
put_task_struct(next_task);
next_task = task;
goto retry;
}
deactivate_task(rq, next_task, 0);
@@ -1147,23 +1358,12 @@ static int push_rt_task(struct rq *rq)
double_unlock_balance(rq, lowest_rq);
ret = 1;
out:
put_task_struct(next_task);
return ret;
return 1;
}
/*
* TODO: Currently we just use the second highest prio task on
* the queue, and stop when it can't migrate (or there's
* no more RT tasks). There may be a case where a lower
* priority RT task has a different affinity than the
* higher RT task. In this case the lower RT task could
* possibly be able to migrate where as the higher priority
* RT task could not. We currently ignore this issue.
* Enhancements are welcome!
*/
static void push_rt_tasks(struct rq *rq)
{
/* push_rt_task will return true if it moved an RT */
@@ -1174,33 +1374,35 @@ static void push_rt_tasks(struct rq *rq)
static int pull_rt_task(struct rq *this_rq)
{
int this_cpu = this_rq->cpu, ret = 0, cpu;
struct task_struct *p, *next;
struct task_struct *p;
struct rq *src_rq;
if (likely(!rt_overloaded(this_rq)))
return 0;
next = pick_next_task_rt(this_rq);
for_each_cpu(cpu, this_rq->rd->rto_mask) {
if (this_cpu == cpu)
continue;
src_rq = cpu_rq(cpu);
/*
* Don't bother taking the src_rq->lock if the next highest
* task is known to be lower-priority than our current task.
* This may look racy, but if this value is about to go
* logically higher, the src_rq will push this task away.
* And if its going logically lower, we do not care
*/
if (src_rq->rt.highest_prio.next >=
this_rq->rt.highest_prio.curr)
continue;
/*
* We can potentially drop this_rq's lock in
* double_lock_balance, and another CPU could
* steal our next task - hence we must cause
* the caller to recalculate the next task
* in that case:
* alter this_rq
*/
if (double_lock_balance(this_rq, src_rq)) {
struct task_struct *old_next = next;
next = pick_next_task_rt(this_rq);
if (next != old_next)
ret = 1;
}
double_lock_balance(this_rq, src_rq);
/*
* Are there still pullable RT tasks?
@@ -1214,7 +1416,7 @@ static int pull_rt_task(struct rq *this_rq)
* Do we have an RT task that preempts
* the to-be-scheduled task?
*/
if (p && (!next || (p->prio < next->prio))) {
if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
WARN_ON(p == src_rq->curr);
WARN_ON(!p->se.on_rq);
@@ -1224,12 +1426,9 @@ static int pull_rt_task(struct rq *this_rq)
* This is just that p is wakeing up and hasn't
* had a chance to schedule. We only pull
* p if it is lower in priority than the
* current task on the run queue or
* this_rq next task is lower in prio than
* the current task on that rq.
* current task on the run queue
*/
if (p->prio < src_rq->curr->prio ||
(next && next->prio < src_rq->curr->prio))
if (p->prio < src_rq->curr->prio)
goto skip;
ret = 1;
@@ -1242,13 +1441,7 @@ static int pull_rt_task(struct rq *this_rq)
* case there's an even higher prio task
* in another runqueue. (low likelyhood
* but possible)
*
* Update next so that we won't pick a task
* on another cpu with a priority lower (or equal)
* than the one we just picked.
*/
next = p;
}
skip:
double_unlock_balance(this_rq, src_rq);
@@ -1260,24 +1453,27 @@ static int pull_rt_task(struct rq *this_rq)
static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
{
/* Try to pull RT tasks here if we lower this rq's prio */
if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
pull_rt_task(rq);
}
/*
* assumes rq->lock is held
*/
static int needs_post_schedule_rt(struct rq *rq)
{
return has_pushable_tasks(rq);
}
static void post_schedule_rt(struct rq *rq)
{
/*
* If we have more than one rt_task queued, then
* see if we can push the other rt_tasks off to other CPUS.
* Note we may release the rq lock, and since
* the lock was owned by prev, we need to release it
* first via finish_lock_switch and then reaquire it here.
* This is only called if needs_post_schedule_rt() indicates that
* we need to push tasks away
*/
if (unlikely(rq->rt.overloaded)) {
spin_lock_irq(&rq->lock);
push_rt_tasks(rq);
spin_unlock_irq(&rq->lock);
}
spin_lock_irq(&rq->lock);
push_rt_tasks(rq);
spin_unlock_irq(&rq->lock);
}
/*
@@ -1288,7 +1484,8 @@ static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
{
if (!task_running(rq, p) &&
!test_tsk_need_resched(rq->curr) &&
rq->rt.overloaded)
has_pushable_tasks(rq) &&
p->rt.nr_cpus_allowed > 1)
push_rt_tasks(rq);
}
@@ -1324,6 +1521,24 @@ static void set_cpus_allowed_rt(struct task_struct *p,
if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
struct rq *rq = task_rq(p);
if (!task_current(rq, p)) {
/*
* Make sure we dequeue this task from the pushable list
* before going further. It will either remain off of
* the list because we are no longer pushable, or it
* will be requeued.
*/
if (p->rt.nr_cpus_allowed > 1)
dequeue_pushable_task(rq, p);
/*
* Requeue if our weight is changing and still > 1
*/
if (weight > 1)
enqueue_pushable_task(rq, p);
}
if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
rq->rt.rt_nr_migratory++;
} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
@@ -1331,7 +1546,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
rq->rt.rt_nr_migratory--;
}
update_rt_migration(rq);
update_rt_migration(&rq->rt);
}
cpumask_copy(&p->cpus_allowed, new_mask);
@@ -1346,7 +1561,7 @@ static void rq_online_rt(struct rq *rq)
__enable_runtime(rq);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
}
/* Assumes rq->lock is held */
@@ -1438,7 +1653,7 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p,
* can release the rq lock and p could migrate.
* Only reschedule if p is still on the same runqueue.
*/
if (p->prio > rq->rt.highest_prio && rq->curr == p)
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
resched_task(p);
#else
/* For UP simply resched on drop of prio */
@@ -1509,6 +1724,9 @@ static void set_curr_task_rt(struct rq *rq)
struct task_struct *p = rq->curr;
p->se.exec_start = rq->clock;
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
}
static const struct sched_class rt_sched_class = {
@@ -1531,6 +1749,7 @@ static const struct sched_class rt_sched_class = {
.rq_online = rq_online_rt,
.rq_offline = rq_offline_rt,
.pre_schedule = pre_schedule_rt,
.needs_post_schedule = needs_post_schedule_rt,
.post_schedule = post_schedule_rt,
.task_wake_up = task_wake_up_rt,
.switched_from = switched_from_rt,

View File

@@ -4,7 +4,7 @@
* bump this up when changing the output format or the meaning of an existing
* format, so that tools can adapt (or abort)
*/
#define SCHEDSTAT_VERSION 14
#define SCHEDSTAT_VERSION 15
static int show_schedstat(struct seq_file *seq, void *v)
{
@@ -26,9 +26,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
/* runqueue-specific stats */
seq_printf(seq,
"cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
cpu, rq->yld_both_empty,
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
"cpu%d %u %u %u %u %u %u %llu %llu %lu",
cpu, rq->yld_count,
rq->sched_switch, rq->sched_count, rq->sched_goidle,
rq->ttwu_count, rq->ttwu_local,
rq->rq_cpu_time,

View File

@@ -796,6 +796,11 @@ int __init __weak early_irq_init(void)
return 0;
}
int __init __weak arch_probe_nr_irqs(void)
{
return 0;
}
int __init __weak arch_early_irq_init(void)
{
return 0;

View File

@@ -170,7 +170,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
* doesn't hit this CPU until we're ready. */
get_cpu();
for_each_online_cpu(i) {
sm_work = percpu_ptr(stop_machine_work, i);
sm_work = per_cpu_ptr(stop_machine_work, i);
INIT_WORK(sm_work, stop_cpu);
queue_work_on(i, stop_machine_wq, sm_work);
}

View File

@@ -219,6 +219,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
{ NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
{ NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
{ NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
{ NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
{}
};

View File

@@ -1,4 +1,4 @@
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o

View File

@@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev,
if (dev->mode != mode) {
dev->set_mode(mode, dev);
dev->mode = mode;
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
if (mode == CLOCK_EVT_MODE_ONESHOT) {
if (unlikely(!dev->mult)) {
dev->mult = 1;
WARN_ON(1);
}
}
}
}
@@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev)
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
BUG_ON(!dev->cpumask);
/*
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
* on it, so fix it up and emit a warning:
*/
if (unlikely(!dev->mult)) {
dev->mult = 1;
WARN_ON(1);
}
spin_lock(&clockevents_lock);
list_add(&dev->list, &clockevent_devices);

View File

@@ -31,6 +31,82 @@
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
#include <linux/tick.h>
void timecounter_init(struct timecounter *tc,
const struct cyclecounter *cc,
u64 start_tstamp)
{
tc->cc = cc;
tc->cycle_last = cc->read(cc);
tc->nsec = start_tstamp;
}
EXPORT_SYMBOL(timecounter_init);
/**
* timecounter_read_delta - get nanoseconds since last call of this function
* @tc: Pointer to time counter
*
* When the underlying cycle counter runs over, this will be handled
* correctly as long as it does not run over more than once between
* calls.
*
* The first call to this function for a new time counter initializes
* the time tracking and returns an undefined result.
*/
static u64 timecounter_read_delta(struct timecounter *tc)
{
cycle_t cycle_now, cycle_delta;
u64 ns_offset;
/* read cycle counter: */
cycle_now = tc->cc->read(tc->cc);
/* calculate the delta since the last timecounter_read_delta(): */
cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
/* convert to nanoseconds: */
ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
/* update time stamp of timecounter_read_delta() call: */
tc->cycle_last = cycle_now;
return ns_offset;
}
u64 timecounter_read(struct timecounter *tc)
{
u64 nsec;
/* increment time by nanoseconds since last call */
nsec = timecounter_read_delta(tc);
nsec += tc->nsec;
tc->nsec = nsec;
return nsec;
}
EXPORT_SYMBOL(timecounter_read);
u64 timecounter_cyc2time(struct timecounter *tc,
cycle_t cycle_tstamp)
{
u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
u64 nsec;
/*
* Instead of always treating cycle_tstamp as more recent
* than tc->cycle_last, detect when it is too far in the
* future and treat it as old time stamp instead.
*/
if (cycle_delta > tc->cc->mask / 2) {
cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
} else {
nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
}
return nsec;
}
EXPORT_SYMBOL(timecounter_cyc2time);
/* XXX - Would like a better way for initializing curr_clocksource */
extern struct clocksource clocksource_jiffies;

View File

@@ -1,71 +1,129 @@
/*
* linux/kernel/time/ntp.c
*
* NTP state machine interfaces and logic.
*
* This code was mainly moved from kernel/timer.c and kernel/time.c
* Please see those files for relevant copyright info and historical
* changelogs.
*/
#include <linux/mm.h>
#include <linux/time.h>
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/hrtimer.h>
#include <linux/capability.h>
#include <linux/math64.h>
#include <linux/clocksource.h>
#include <linux/workqueue.h>
#include <asm/timex.h>
#include <linux/hrtimer.h>
#include <linux/jiffies.h>
#include <linux/math64.h>
#include <linux/timex.h>
#include <linux/time.h>
#include <linux/mm.h>
/*
* Timekeeping variables
* NTP timekeeping variables:
*/
unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */
unsigned long tick_nsec; /* ACTHZ period (nsec) */
u64 tick_length;
static u64 tick_length_base;
static struct hrtimer leap_timer;
/* USER_HZ period (usecs): */
unsigned long tick_usec = TICK_USEC;
#define MAX_TICKADJ 500 /* microsecs */
#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
/* ACTHZ period (nsecs): */
unsigned long tick_nsec;
u64 tick_length;
static u64 tick_length_base;
static struct hrtimer leap_timer;
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
/*
* phase-lock loop variables
*/
/* TIME_ERROR prevents overwriting the CMOS clock */
static int time_state = TIME_OK; /* clock synchronization status */
int time_status = STA_UNSYNC; /* clock status bits */
static long time_tai; /* TAI offset (s) */
static s64 time_offset; /* time adjustment (ns) */
static long time_constant = 2; /* pll time constant */
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
static s64 time_freq; /* frequency offset (scaled ns/s)*/
static long time_reftime; /* time at last adjustment (s) */
long time_adjust;
static long ntp_tick_adj;
/*
* clock synchronization status
*
* (TIME_ERROR prevents overwriting the CMOS clock)
*/
static int time_state = TIME_OK;
/* clock status bits: */
int time_status = STA_UNSYNC;
/* TAI offset (secs): */
static long time_tai;
/* time adjustment (nsecs): */
static s64 time_offset;
/* pll time constant: */
static long time_constant = 2;
/* maximum error (usecs): */
long time_maxerror = NTP_PHASE_LIMIT;
/* estimated error (usecs): */
long time_esterror = NTP_PHASE_LIMIT;
/* frequency offset (scaled nsecs/secs): */
static s64 time_freq;
/* time at last adjustment (secs): */
static long time_reftime;
long time_adjust;
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
static s64 ntp_tick_adj;
/*
* NTP methods:
*/
/*
* Update (tick_length, tick_length_base, tick_nsec), based
* on (tick_usec, ntp_tick_adj, time_freq):
*/
static void ntp_update_frequency(void)
{
u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
<< NTP_SCALE_SHIFT;
second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT;
second_length += time_freq;
u64 second_length;
u64 new_base;
tick_length_base = second_length;
second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
<< NTP_SCALE_SHIFT;
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ);
second_length += ntp_tick_adj;
second_length += time_freq;
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
/*
* Don't wait for the next second_overflow, apply
* the change to the tick length immediately:
*/
tick_length += new_base - tick_length_base;
tick_length_base = new_base;
}
static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
{
time_status &= ~STA_MODE;
if (secs < MINSEC)
return 0;
if (!(time_status & STA_FLL) && (secs <= MAXSEC))
return 0;
time_status |= STA_MODE;
return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
}
static void ntp_update_offset(long offset)
{
long mtemp;
s64 freq_adj;
s64 offset64;
long secs;
if (!(time_status & STA_PLL))
return;
@@ -84,24 +142,23 @@ static void ntp_update_offset(long offset)
* Select how the frequency is to be controlled
* and in which mode (PLL or FLL).
*/
if (time_status & STA_FREQHOLD || time_reftime == 0)
time_reftime = xtime.tv_sec;
mtemp = xtime.tv_sec - time_reftime;
secs = xtime.tv_sec - time_reftime;
if (unlikely(time_status & STA_FREQHOLD))
secs = 0;
time_reftime = xtime.tv_sec;
freq_adj = (s64)offset * mtemp;
freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant);
time_status &= ~STA_MODE;
if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL),
mtemp);
time_status |= STA_MODE;
}
freq_adj += time_freq;
freq_adj = min(freq_adj, MAXFREQ_SCALED);
time_freq = max(freq_adj, -MAXFREQ_SCALED);
offset64 = offset;
freq_adj = (offset64 * secs) <<
(NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
freq_adj += ntp_update_offset_fll(offset64, secs);
freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED);
time_freq = max(freq_adj, -MAXFREQ_SCALED);
time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
}
/**
@@ -111,15 +168,15 @@ static void ntp_update_offset(long offset)
*/
void ntp_clear(void)
{
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
time_adjust = 0; /* stop active adjtime() */
time_status |= STA_UNSYNC;
time_maxerror = NTP_PHASE_LIMIT;
time_esterror = NTP_PHASE_LIMIT;
ntp_update_frequency();
tick_length = tick_length_base;
time_offset = 0;
tick_length = tick_length_base;
time_offset = 0;
}
/*
@@ -140,8 +197,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
xtime.tv_sec--;
wall_to_monotonic.tv_sec++;
time_state = TIME_OOP;
printk(KERN_NOTICE "Clock: "
"inserting leap second 23:59:60 UTC\n");
printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n");
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
res = HRTIMER_RESTART;
break;
@@ -150,8 +207,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
time_tai--;
wall_to_monotonic.tv_sec--;
time_state = TIME_WAIT;
printk(KERN_NOTICE "Clock: "
"deleting leap second 23:59:59 UTC\n");
printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n");
break;
case TIME_OOP:
time_tai++;
@@ -179,7 +236,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
*/
void second_overflow(void)
{
s64 time_adj;
s64 delta;
/* Bump the maxerror field */
time_maxerror += MAXFREQ / NSEC_PER_USEC;
@@ -192,24 +249,30 @@ void second_overflow(void)
* Compute the phase adjustment for the next second. The offset is
* reduced by a fixed factor times the time constant.
*/
tick_length = tick_length_base;
time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
time_offset -= time_adj;
tick_length += time_adj;
tick_length = tick_length_base;
if (unlikely(time_adjust)) {
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
} else if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
} else {
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT;
time_adjust = 0;
}
delta = shift_right(time_offset, SHIFT_PLL + time_constant);
time_offset -= delta;
tick_length += delta;
if (!time_adjust)
return;
if (time_adjust > MAX_TICKADJ) {
time_adjust -= MAX_TICKADJ;
tick_length += MAX_TICKADJ_SCALED;
return;
}
if (time_adjust < -MAX_TICKADJ) {
time_adjust += MAX_TICKADJ;
tick_length -= MAX_TICKADJ_SCALED;
return;
}
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
<< NTP_SCALE_SHIFT;
time_adjust = 0;
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
@@ -233,12 +296,13 @@ static void sync_cmos_clock(struct work_struct *work)
* This code is run on a timer. If the clock is set, that timer
* may not expire at the correct time. Thus, we adjust...
*/
if (!ntp_synced())
if (!ntp_synced()) {
/*
* Not synced, exit, do not restart a timer (if one is
* running, let it run out).
*/
return;
}
getnstimeofday(&now);
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
@@ -270,7 +334,116 @@ static void notify_cmos_timer(void)
static inline void notify_cmos_timer(void) { }
#endif
/* adjtimex mainly allows reading (and writing, if superuser) of
/*
* Start the leap seconds timer:
*/
static inline void ntp_start_leap_timer(struct timespec *ts)
{
long now = ts->tv_sec;
if (time_status & STA_INS) {
time_state = TIME_INS;
now += 86400 - now % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
return;
}
if (time_status & STA_DEL) {
time_state = TIME_DEL;
now += 86400 - (now + 1) % 86400;
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
}
}
/*
* Propagate a new txc->status value into the NTP state:
*/
static inline void process_adj_status(struct timex *txc, struct timespec *ts)
{
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
}
/*
* If we turn on PLL adjustments then reset the
* reference time to current time.
*/
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
time_reftime = xtime.tv_sec;
/* only set allowed bits */
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
switch (time_state) {
case TIME_OK:
ntp_start_leap_timer(ts);
break;
case TIME_INS:
case TIME_DEL:
time_state = TIME_OK;
ntp_start_leap_timer(ts);
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
case TIME_OOP:
hrtimer_restart(&leap_timer);
break;
}
}
/*
* Called with the xtime lock held, so we can access and modify
* all the global NTP state:
*/
static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
{
if (txc->modes & ADJ_STATUS)
process_adj_status(txc, ts);
if (txc->modes & ADJ_NANO)
time_status |= STA_NANO;
if (txc->modes & ADJ_MICRO)
time_status &= ~STA_NANO;
if (txc->modes & ADJ_FREQUENCY) {
time_freq = txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
}
if (txc->modes & ADJ_MAXERROR)
time_maxerror = txc->maxerror;
if (txc->modes & ADJ_ESTERROR)
time_esterror = txc->esterror;
if (txc->modes & ADJ_TIMECONST) {
time_constant = txc->constant;
if (!(time_status & STA_NANO))
time_constant += 4;
time_constant = min(time_constant, (long)MAXTC);
time_constant = max(time_constant, 0l);
}
if (txc->modes & ADJ_TAI && txc->constant > 0)
time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
ntp_update_offset(txc->offset);
if (txc->modes & ADJ_TICK)
tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
ntp_update_frequency();
}
/*
* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
int do_adjtimex(struct timex *txc)
@@ -291,11 +464,14 @@ int do_adjtimex(struct timex *txc)
if (txc->modes && !capable(CAP_SYS_TIME))
return -EPERM;
/* if the quartz is off by more than 10% something is VERY wrong! */
/*
* if the quartz is off by more than 10% then
* something is VERY wrong!
*/
if (txc->modes & ADJ_TICK &&
(txc->tick < 900000/USER_HZ ||
txc->tick > 1100000/USER_HZ))
return -EINVAL;
return -EINVAL;
if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
hrtimer_cancel(&leap_timer);
@@ -305,7 +481,6 @@ int do_adjtimex(struct timex *txc)
write_seqlock_irq(&xtime_lock);
/* If there are input parameters, then process them */
if (txc->modes & ADJ_ADJTIME) {
long save_adjust = time_adjust;
@@ -315,98 +490,24 @@ int do_adjtimex(struct timex *txc)
ntp_update_frequency();
}
txc->offset = save_adjust;
goto adj_done;
}
if (txc->modes) {
long sec;
} else {
if (txc->modes & ADJ_STATUS) {
if ((time_status & STA_PLL) &&
!(txc->status & STA_PLL)) {
time_state = TIME_OK;
time_status = STA_UNSYNC;
}
/* only set allowed bits */
time_status &= STA_RONLY;
time_status |= txc->status & ~STA_RONLY;
/* If there are input parameters, then process them: */
if (txc->modes)
process_adjtimex_modes(txc, &ts);
switch (time_state) {
case TIME_OK:
start_timer:
sec = ts.tv_sec;
if (time_status & STA_INS) {
time_state = TIME_INS;
sec += 86400 - sec % 86400;
hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
} else if (time_status & STA_DEL) {
time_state = TIME_DEL;
sec += 86400 - (sec + 1) % 86400;
hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
}
break;
case TIME_INS:
case TIME_DEL:
time_state = TIME_OK;
goto start_timer;
break;
case TIME_WAIT:
if (!(time_status & (STA_INS | STA_DEL)))
time_state = TIME_OK;
break;
case TIME_OOP:
hrtimer_restart(&leap_timer);
break;
}
}
if (txc->modes & ADJ_NANO)
time_status |= STA_NANO;
if (txc->modes & ADJ_MICRO)
time_status &= ~STA_NANO;
if (txc->modes & ADJ_FREQUENCY) {
time_freq = (s64)txc->freq * PPM_SCALE;
time_freq = min(time_freq, MAXFREQ_SCALED);
time_freq = max(time_freq, -MAXFREQ_SCALED);
}
if (txc->modes & ADJ_MAXERROR)
time_maxerror = txc->maxerror;
if (txc->modes & ADJ_ESTERROR)
time_esterror = txc->esterror;
if (txc->modes & ADJ_TIMECONST) {
time_constant = txc->constant;
if (!(time_status & STA_NANO))
time_constant += 4;
time_constant = min(time_constant, (long)MAXTC);
time_constant = max(time_constant, 0l);
}
if (txc->modes & ADJ_TAI && txc->constant > 0)
time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
ntp_update_offset(txc->offset);
if (txc->modes & ADJ_TICK)
tick_usec = txc->tick;
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
ntp_update_frequency();
}
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
NTP_SCALE_SHIFT);
if (!(time_status & STA_NANO))
txc->offset /= NSEC_PER_USEC;
if (!(time_status & STA_NANO))
txc->offset /= NSEC_PER_USEC;
}
adj_done:
result = time_state; /* mostly `TIME_OK' */
if (time_status & (STA_UNSYNC|STA_CLOCKERR))
result = TIME_ERROR;
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
(s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->maxerror = time_maxerror;
txc->esterror = time_esterror;
txc->status = time_status;
@@ -425,6 +526,7 @@ adj_done:
txc->calcnt = 0;
txc->errcnt = 0;
txc->stbcnt = 0;
write_sequnlock_irq(&xtime_lock);
txc->time.tv_sec = ts.tv_sec;
@@ -440,6 +542,8 @@ adj_done:
static int __init ntp_tick_adj_setup(char *str)
{
ntp_tick_adj = simple_strtol(str, NULL, 0);
ntp_tick_adj <<= NTP_SCALE_SHIFT;
return 1;
}

191
kernel/time/timecompare.c Normal file
View File

@@ -0,0 +1,191 @@
/*
* Copyright (C) 2009 Intel Corporation.
* Author: Patrick Ohly <patrick.ohly@intel.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/timecompare.h>
#include <linux/module.h>
#include <linux/math64.h>
/*
* fixed point arithmetic scale factor for skew
*
* Usually one would measure skew in ppb (parts per billion, 1e9), but
* using a factor of 2 simplifies the math.
*/
#define TIMECOMPARE_SKEW_RESOLUTION (((s64)1)<<30)
ktime_t timecompare_transform(struct timecompare *sync,
u64 source_tstamp)
{
u64 nsec;
nsec = source_tstamp + sync->offset;
nsec += (s64)(source_tstamp - sync->last_update) * sync->skew /
TIMECOMPARE_SKEW_RESOLUTION;
return ns_to_ktime(nsec);
}
EXPORT_SYMBOL(timecompare_transform);
int timecompare_offset(struct timecompare *sync,
s64 *offset,
u64 *source_tstamp)
{
u64 start_source = 0, end_source = 0;
struct {
s64 offset;
s64 duration_target;
} buffer[10], sample, *samples;
int counter = 0, i;
int used;
int index;
int num_samples = sync->num_samples;
if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
if (!samples) {
samples = buffer;
num_samples = sizeof(buffer)/sizeof(buffer[0]);
}
} else {
samples = buffer;
}
/* run until we have enough valid samples, but do not try forever */
i = 0;
counter = 0;
while (1) {
u64 ts;
ktime_t start, end;
start = sync->target();
ts = timecounter_read(sync->source);
end = sync->target();
if (!i)
start_source = ts;
/* ignore negative durations */
sample.duration_target = ktime_to_ns(ktime_sub(end, start));
if (sample.duration_target >= 0) {
/*
* assume symetric delay to and from source:
* average target time corresponds to measured
* source time
*/
sample.offset =
ktime_to_ns(ktime_add(end, start)) / 2 -
ts;
/* simple insertion sort based on duration */
index = counter - 1;
while (index >= 0) {
if (samples[index].duration_target <
sample.duration_target)
break;
samples[index + 1] = samples[index];
index--;
}
samples[index + 1] = sample;
counter++;
}
i++;
if (counter >= num_samples || i >= 100000) {
end_source = ts;
break;
}
}
*source_tstamp = (end_source + start_source) / 2;
/* remove outliers by only using 75% of the samples */
used = counter * 3 / 4;
if (!used)
used = counter;
if (used) {
/* calculate average */
s64 off = 0;
for (index = 0; index < used; index++)
off += samples[index].offset;
*offset = div_s64(off, used);
}
if (samples && samples != buffer)
kfree(samples);
return used;
}
EXPORT_SYMBOL(timecompare_offset);
void __timecompare_update(struct timecompare *sync,
u64 source_tstamp)
{
s64 offset;
u64 average_time;
if (!timecompare_offset(sync, &offset, &average_time))
return;
if (!sync->last_update) {
sync->last_update = average_time;
sync->offset = offset;
sync->skew = 0;
} else {
s64 delta_nsec = average_time - sync->last_update;
/* avoid division by negative or small deltas */
if (delta_nsec >= 10000) {
s64 delta_offset_nsec = offset - sync->offset;
s64 skew; /* delta_offset_nsec *
TIMECOMPARE_SKEW_RESOLUTION /
delta_nsec */
u64 divisor;
/* div_s64() is limited to 32 bit divisor */
skew = delta_offset_nsec * TIMECOMPARE_SKEW_RESOLUTION;
divisor = delta_nsec;
while (unlikely(divisor >= ((s64)1) << 32)) {
/* divide both by 2; beware, right shift
of negative value has undefined
behavior and can only be used for
the positive divisor */
skew = div_s64(skew, 2);
divisor >>= 1;
}
skew = div_s64(skew, divisor);
/*
* Calculate new overall skew as 4/16 the
* old value and 12/16 the new one. This is
* a rather arbitrary tradeoff between
* only using the latest measurement (0/16 and
* 16/16) and even more weight on past measurements.
*/
#define TIMECOMPARE_NEW_SKEW_PER_16 12
sync->skew =
div_s64((16 - TIMECOMPARE_NEW_SKEW_PER_16) *
sync->skew +
TIMECOMPARE_NEW_SKEW_PER_16 * skew,
16);
sync->last_update = average_time;
sync->offset = offset;
}
}
}
EXPORT_SYMBOL(__timecompare_update);

View File

@@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
}
}
int __mod_timer(struct timer_list *timer, unsigned long expires)
static inline int
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
{
struct tvec_base *base, *new_base;
unsigned long flags;
int ret = 0;
int ret;
ret = 0;
timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
@@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
if (timer_pending(timer)) {
detach_timer(timer, 0);
ret = 1;
} else {
if (pending_only)
goto out_unlock;
}
debug_timer_activate(timer);
@@ -629,12 +635,83 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
timer->expires = expires;
internal_add_timer(base, timer);
out_unlock:
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
EXPORT_SYMBOL(__mod_timer);
/**
* mod_timer_pending - modify a pending timer's timeout
* @timer: the pending timer to be modified
* @expires: new timeout in jiffies
*
* mod_timer_pending() is the same for pending timers as mod_timer(),
* but will not re-activate and modify already deleted timers.
*
* It is useful for unserialized use of timers.
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
return __mod_timer(timer, expires, true);
}
EXPORT_SYMBOL(mod_timer_pending);
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
* @expires: new timeout in jiffies
*
* mod_timer() is a more efficient way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
*
* mod_timer(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*
* Note that if there are multiple unserialized concurrent users of the
* same timer, then mod_timer() is the only safe way to modify the timeout,
* since add_timer() cannot modify an already running timer.
*
* The function returns whether it has modified a pending timer or not.
* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
* active timer returns 1.)
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
* to be the same thing then just return:
*/
if (timer->expires == expires && timer_pending(timer))
return 1;
return __mod_timer(timer, expires, false);
}
EXPORT_SYMBOL(mod_timer);
/**
* add_timer - start a timer
* @timer: the timer to be added
*
* The kernel will do a ->function(->data) callback from the
* timer interrupt at the ->expires point in the future. The
* current time is 'jiffies'.
*
* The timer's ->expires, ->function (and if the handler uses it, ->data)
* fields must be set prior calling this function.
*
* Timers with an ->expires field in the past will be executed in the next
* timer tick.
*/
void add_timer(struct timer_list *timer)
{
BUG_ON(timer_pending(timer));
mod_timer(timer, timer->expires);
}
EXPORT_SYMBOL(add_timer);
/**
* add_timer_on - start a timer on a particular CPU
@@ -666,44 +743,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
spin_unlock_irqrestore(&base->lock, flags);
}
/**
* mod_timer - modify a timer's timeout
* @timer: the timer to be modified
* @expires: new timeout in jiffies
*
* mod_timer() is a more efficient way to update the expire field of an
* active timer (if the timer is inactive it will be activated)
*
* mod_timer(timer, expires) is equivalent to:
*
* del_timer(timer); timer->expires = expires; add_timer(timer);
*
* Note that if there are multiple unserialized concurrent users of the
* same timer, then mod_timer() is the only safe way to modify the timeout,
* since add_timer() cannot modify an already running timer.
*
* The function returns whether it has modified a pending timer or not.
* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
* active timer returns 1.)
*/
int mod_timer(struct timer_list *timer, unsigned long expires)
{
BUG_ON(!timer->function);
timer_stats_timer_set_start_info(timer);
/*
* This is a common optimization triggered by the
* networking code - if the timer is re-modified
* to be the same thing then just return:
*/
if (timer->expires == expires && timer_pending(timer))
return 1;
return __mod_timer(timer, expires);
}
EXPORT_SYMBOL(mod_timer);
/**
* del_timer - deactive a timer.
* @timer: the timer to be deactivated
@@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer)
return ret;
}
EXPORT_SYMBOL(del_timer);
#ifdef CONFIG_SMP
@@ -767,7 +805,6 @@ out:
return ret;
}
EXPORT_SYMBOL(try_to_del_timer_sync);
/**
@@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer)
cpu_relax();
}
}
EXPORT_SYMBOL(del_timer_sync);
#endif
@@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
__mod_timer(&timer, expire);
__mod_timer(&timer, expire, false);
schedule();
del_singleshot_timer_sync(&timer);

View File

@@ -20,7 +20,7 @@
struct user_namespace init_user_ns = {
.kref = {
.refcount = ATOMIC_INIT(1),
.refcount = ATOMIC_INIT(2),
},
.creator = &root_user,
};