Merge commit 'origin/master' into next
Manual merge of: arch/powerpc/include/asm/elf.h drivers/i2c/busses/i2c-mpc.c
This commit is contained in:
@@ -49,6 +49,7 @@ asynchronous and synchronous parts of the kernel.
|
||||
*/
|
||||
|
||||
#include <linux/async.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/sched.h>
|
||||
@@ -387,20 +388,11 @@ static int async_manager_thread(void *unused)
|
||||
|
||||
static int __init async_init(void)
|
||||
{
|
||||
if (async_enabled)
|
||||
if (IS_ERR(kthread_run(async_manager_thread, NULL,
|
||||
"async/mgr")))
|
||||
async_enabled = 0;
|
||||
async_enabled =
|
||||
!IS_ERR(kthread_run(async_manager_thread, NULL, "async/mgr"));
|
||||
|
||||
WARN_ON(!async_enabled);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __init setup_async(char *str)
|
||||
{
|
||||
async_enabled = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("fastboot", setup_async);
|
||||
|
||||
|
||||
core_initcall(async_init);
|
||||
|
@@ -1071,7 +1071,8 @@ static int cgroup_get_sb(struct file_system_type *fs_type,
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
}
|
||||
|
||||
return simple_set_mnt(mnt, sb);
|
||||
simple_set_mnt(mnt, sb);
|
||||
return 0;
|
||||
|
||||
free_cg_links:
|
||||
free_cg_links(&tmp_cg_links);
|
||||
@@ -1627,7 +1628,7 @@ static struct inode_operations cgroup_dir_inode_operations = {
|
||||
static int cgroup_create_file(struct dentry *dentry, int mode,
|
||||
struct super_block *sb)
|
||||
{
|
||||
static struct dentry_operations cgroup_dops = {
|
||||
static const struct dentry_operations cgroup_dops = {
|
||||
.d_iput = cgroup_diput,
|
||||
};
|
||||
|
||||
|
@@ -980,12 +980,9 @@ static void check_stack_usage(void)
|
||||
{
|
||||
static DEFINE_SPINLOCK(low_water_lock);
|
||||
static int lowest_to_date = THREAD_SIZE;
|
||||
unsigned long *n = end_of_stack(current);
|
||||
unsigned long free;
|
||||
|
||||
while (*n == 0)
|
||||
n++;
|
||||
free = (unsigned long)n - (unsigned long)end_of_stack(current);
|
||||
free = stack_not_used(current);
|
||||
|
||||
if (free >= lowest_to_date)
|
||||
return;
|
||||
|
@@ -61,6 +61,7 @@
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <trace/sched.h>
|
||||
#include <linux/magic.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
struct thread_info *ti;
|
||||
unsigned long *stackend;
|
||||
|
||||
int err;
|
||||
|
||||
prepare_to_copy(orig);
|
||||
@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
|
||||
goto out;
|
||||
|
||||
setup_thread_stack(tsk, orig);
|
||||
stackend = end_of_stack(tsk);
|
||||
*stackend = STACK_END_MAGIC; /* for overflow detection */
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
tsk->stack_canary = get_random_int();
|
||||
|
201
kernel/futex.c
201
kernel/futex.c
@@ -114,7 +114,9 @@ struct futex_q {
|
||||
};
|
||||
|
||||
/*
|
||||
* Split the global futex_lock into every hash list lock.
|
||||
* Hash buckets are shared by all the futex_keys that hash to the same
|
||||
* location. Each key may have multiple futex_q structures, one for each task
|
||||
* waiting on a futex.
|
||||
*/
|
||||
struct futex_hash_bucket {
|
||||
spinlock_t lock;
|
||||
@@ -189,8 +191,7 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
/**
|
||||
* get_futex_key - Get parameters which are the keys for a futex.
|
||||
* @uaddr: virtual address of the futex
|
||||
* @shared: NULL for a PROCESS_PRIVATE futex,
|
||||
* ¤t->mm->mmap_sem for a PROCESS_SHARED futex
|
||||
* @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
|
||||
* @key: address where result is stored.
|
||||
*
|
||||
* Returns a negative error code or 0
|
||||
@@ -200,9 +201,7 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
||||
* We can usually work out the index without swapping in the page.
|
||||
*
|
||||
* fshared is NULL for PROCESS_PRIVATE futexes
|
||||
* For other futexes, it points to ¤t->mm->mmap_sem and
|
||||
* caller must have taken the reader lock. but NOT any spinlocks.
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
|
||||
{
|
||||
@@ -299,41 +298,6 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
|
||||
return ret ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fault handling.
|
||||
*/
|
||||
static int futex_handle_fault(unsigned long address, int attempt)
|
||||
{
|
||||
struct vm_area_struct * vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
int ret = -EFAULT;
|
||||
|
||||
if (attempt > 2)
|
||||
return ret;
|
||||
|
||||
down_read(&mm->mmap_sem);
|
||||
vma = find_vma(mm, address);
|
||||
if (vma && address >= vma->vm_start &&
|
||||
(vma->vm_flags & VM_WRITE)) {
|
||||
int fault;
|
||||
fault = handle_mm_fault(mm, vma, address, 1);
|
||||
if (unlikely((fault & VM_FAULT_ERROR))) {
|
||||
#if 0
|
||||
/* XXX: let's do this when we verify it is OK */
|
||||
if (ret & VM_FAULT_OOM)
|
||||
ret = -ENOMEM;
|
||||
#endif
|
||||
} else {
|
||||
ret = 0;
|
||||
if (fault & VM_FAULT_MAJOR)
|
||||
current->maj_flt++;
|
||||
else
|
||||
current->min_flt++;
|
||||
}
|
||||
}
|
||||
up_read(&mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* PI code:
|
||||
@@ -589,10 +553,9 @@ static void wake_futex(struct futex_q *q)
|
||||
* The waiting task can free the futex_q as soon as this is written,
|
||||
* without taking any locks. This must come last.
|
||||
*
|
||||
* A memory barrier is required here to prevent the following store
|
||||
* to lock_ptr from getting ahead of the wakeup. Clearing the lock
|
||||
* at the end of wake_up_all() does not prevent this store from
|
||||
* moving.
|
||||
* A memory barrier is required here to prevent the following store to
|
||||
* lock_ptr from getting ahead of the wakeup. Clearing the lock at the
|
||||
* end of wake_up() does not prevent this store from moving.
|
||||
*/
|
||||
smp_wmb();
|
||||
q->lock_ptr = NULL;
|
||||
@@ -692,9 +655,16 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
|
||||
{
|
||||
spin_unlock(&hb1->lock);
|
||||
if (hb1 != hb2)
|
||||
spin_unlock(&hb2->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up all waiters hashed on the physical page that is mapped
|
||||
* to this virtual address:
|
||||
* Wake up waiters matching bitset queued on this futex (uaddr).
|
||||
*/
|
||||
static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
|
||||
{
|
||||
@@ -750,9 +720,9 @@ futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
|
||||
struct futex_hash_bucket *hb1, *hb2;
|
||||
struct plist_head *head;
|
||||
struct futex_q *this, *next;
|
||||
int ret, op_ret, attempt = 0;
|
||||
int ret, op_ret;
|
||||
|
||||
retryfull:
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, fshared, &key1);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
@@ -763,16 +733,13 @@ retryfull:
|
||||
hb1 = hash_futex(&key1);
|
||||
hb2 = hash_futex(&key2);
|
||||
|
||||
retry:
|
||||
double_lock_hb(hb1, hb2);
|
||||
|
||||
retry_private:
|
||||
op_ret = futex_atomic_op_inuser(op, uaddr2);
|
||||
if (unlikely(op_ret < 0)) {
|
||||
u32 dummy;
|
||||
|
||||
spin_unlock(&hb1->lock);
|
||||
if (hb1 != hb2)
|
||||
spin_unlock(&hb2->lock);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
#ifndef CONFIG_MMU
|
||||
/*
|
||||
@@ -788,26 +755,16 @@ retry:
|
||||
goto out_put_keys;
|
||||
}
|
||||
|
||||
/*
|
||||
* futex_atomic_op_inuser needs to both read and write
|
||||
* *(int __user *)uaddr2, but we can't modify it
|
||||
* non-atomically. Therefore, if get_user below is not
|
||||
* enough, we need to handle the fault ourselves, while
|
||||
* still holding the mmap_sem.
|
||||
*/
|
||||
if (attempt++) {
|
||||
ret = futex_handle_fault((unsigned long)uaddr2,
|
||||
attempt);
|
||||
if (ret)
|
||||
goto out_put_keys;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret = get_user(dummy, uaddr2);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_put_keys;
|
||||
|
||||
goto retryfull;
|
||||
if (!fshared)
|
||||
goto retry_private;
|
||||
|
||||
put_futex_key(fshared, &key2);
|
||||
put_futex_key(fshared, &key1);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
head = &hb1->chain;
|
||||
@@ -834,9 +791,7 @@ retry:
|
||||
ret += op_ret;
|
||||
}
|
||||
|
||||
spin_unlock(&hb1->lock);
|
||||
if (hb1 != hb2)
|
||||
spin_unlock(&hb2->lock);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
out_put_keys:
|
||||
put_futex_key(fshared, &key2);
|
||||
out_put_key1:
|
||||
@@ -869,6 +824,7 @@ retry:
|
||||
hb1 = hash_futex(&key1);
|
||||
hb2 = hash_futex(&key2);
|
||||
|
||||
retry_private:
|
||||
double_lock_hb(hb1, hb2);
|
||||
|
||||
if (likely(cmpval != NULL)) {
|
||||
@@ -877,16 +833,18 @@ retry:
|
||||
ret = get_futex_value_locked(&curval, uaddr1);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
spin_unlock(&hb1->lock);
|
||||
if (hb1 != hb2)
|
||||
spin_unlock(&hb2->lock);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
ret = get_user(curval, uaddr1);
|
||||
if (ret)
|
||||
goto out_put_keys;
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
if (!fshared)
|
||||
goto retry_private;
|
||||
|
||||
goto out_put_keys;
|
||||
put_futex_key(fshared, &key2);
|
||||
put_futex_key(fshared, &key1);
|
||||
goto retry;
|
||||
}
|
||||
if (curval != *cmpval) {
|
||||
ret = -EAGAIN;
|
||||
@@ -923,9 +881,7 @@ retry:
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&hb1->lock);
|
||||
if (hb1 != hb2)
|
||||
spin_unlock(&hb2->lock);
|
||||
double_unlock_hb(hb1, hb2);
|
||||
|
||||
/* drop_futex_key_refs() must be called outside the spinlocks. */
|
||||
while (--drop_count >= 0)
|
||||
@@ -1063,7 +1019,7 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
struct futex_pi_state *pi_state = q->pi_state;
|
||||
struct task_struct *oldowner = pi_state->owner;
|
||||
u32 uval, curval, newval;
|
||||
int ret, attempt = 0;
|
||||
int ret;
|
||||
|
||||
/* Owner died? */
|
||||
if (!pi_state->owner)
|
||||
@@ -1076,11 +1032,9 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
|
||||
* in the user space variable. This must be atomic as we have
|
||||
* to preserve the owner died bit here.
|
||||
*
|
||||
* Note: We write the user space value _before_ changing the
|
||||
* pi_state because we can fault here. Imagine swapped out
|
||||
* pages or a fork, which was running right before we acquired
|
||||
* mmap_sem, that marked all the anonymous memory readonly for
|
||||
* cow.
|
||||
* Note: We write the user space value _before_ changing the pi_state
|
||||
* because we can fault here. Imagine swapped out pages or a fork
|
||||
* that marked all the anonymous memory readonly for cow.
|
||||
*
|
||||
* Modifying pi_state _before_ the user space value would
|
||||
* leave the pi_state in an inconsistent state when we fault
|
||||
@@ -1136,7 +1090,7 @@ retry:
|
||||
handle_fault:
|
||||
spin_unlock(q->lock_ptr);
|
||||
|
||||
ret = futex_handle_fault((unsigned long)uaddr, attempt++);
|
||||
ret = get_user(uval, uaddr);
|
||||
|
||||
spin_lock(q->lock_ptr);
|
||||
|
||||
@@ -1185,10 +1139,11 @@ retry:
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
retry_private:
|
||||
hb = queue_lock(&q);
|
||||
|
||||
/*
|
||||
* Access the page AFTER the futex is queued.
|
||||
* Access the page AFTER the hash-bucket is locked.
|
||||
* Order is important:
|
||||
*
|
||||
* Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
|
||||
@@ -1204,20 +1159,23 @@ retry:
|
||||
* a wakeup when *uaddr != val on entry to the syscall. This is
|
||||
* rare, but normal.
|
||||
*
|
||||
* for shared futexes, we hold the mmap semaphore, so the mapping
|
||||
* For shared futexes, we hold the mmap semaphore, so the mapping
|
||||
* cannot have changed since we looked it up in get_futex_key.
|
||||
*/
|
||||
ret = get_futex_value_locked(&uval, uaddr);
|
||||
|
||||
if (unlikely(ret)) {
|
||||
queue_unlock(&q, hb);
|
||||
put_futex_key(fshared, &q.key);
|
||||
|
||||
ret = get_user(uval, uaddr);
|
||||
if (ret)
|
||||
goto out_put_key;
|
||||
|
||||
if (!ret)
|
||||
goto retry;
|
||||
goto out;
|
||||
if (!fshared)
|
||||
goto retry_private;
|
||||
|
||||
put_futex_key(fshared, &q.key);
|
||||
goto retry;
|
||||
}
|
||||
ret = -EWOULDBLOCK;
|
||||
if (unlikely(uval != val)) {
|
||||
@@ -1248,16 +1206,13 @@ retry:
|
||||
if (!abs_time)
|
||||
schedule();
|
||||
else {
|
||||
unsigned long slack;
|
||||
slack = current->timer_slack_ns;
|
||||
if (rt_task(current))
|
||||
slack = 0;
|
||||
hrtimer_init_on_stack(&t.timer,
|
||||
clockrt ? CLOCK_REALTIME :
|
||||
CLOCK_MONOTONIC,
|
||||
HRTIMER_MODE_ABS);
|
||||
hrtimer_init_sleeper(&t, current);
|
||||
hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
|
||||
hrtimer_set_expires_range_ns(&t.timer, *abs_time,
|
||||
current->timer_slack_ns);
|
||||
|
||||
hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
|
||||
if (!hrtimer_active(&t.timer))
|
||||
@@ -1354,7 +1309,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
|
||||
struct futex_hash_bucket *hb;
|
||||
u32 uval, newval, curval;
|
||||
struct futex_q q;
|
||||
int ret, lock_taken, ownerdied = 0, attempt = 0;
|
||||
int ret, lock_taken, ownerdied = 0;
|
||||
|
||||
if (refill_pi_state_cache())
|
||||
return -ENOMEM;
|
||||
@@ -1374,7 +1329,7 @@ retry:
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
retry_unlocked:
|
||||
retry_private:
|
||||
hb = queue_lock(&q);
|
||||
|
||||
retry_locked:
|
||||
@@ -1458,6 +1413,7 @@ retry_locked:
|
||||
* exit to complete.
|
||||
*/
|
||||
queue_unlock(&q, hb);
|
||||
put_futex_key(fshared, &q.key);
|
||||
cond_resched();
|
||||
goto retry;
|
||||
|
||||
@@ -1564,6 +1520,13 @@ retry_locked:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If fixup_pi_state_owner() faulted and was unable to handle the
|
||||
* fault, unlock it and return the fault to userspace.
|
||||
*/
|
||||
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
|
||||
rt_mutex_unlock(&q.pi_state->pi_mutex);
|
||||
|
||||
/* Unqueue and drop the lock */
|
||||
unqueue_me_pi(&q);
|
||||
|
||||
@@ -1591,22 +1554,18 @@ uaddr_faulted:
|
||||
*/
|
||||
queue_unlock(&q, hb);
|
||||
|
||||
if (attempt++) {
|
||||
ret = futex_handle_fault((unsigned long)uaddr, attempt);
|
||||
if (ret)
|
||||
goto out_put_key;
|
||||
goto retry_unlocked;
|
||||
}
|
||||
|
||||
ret = get_user(uval, uaddr);
|
||||
if (!ret)
|
||||
goto retry;
|
||||
if (ret)
|
||||
goto out_put_key;
|
||||
|
||||
if (to)
|
||||
destroy_hrtimer_on_stack(&to->timer);
|
||||
return ret;
|
||||
if (!fshared)
|
||||
goto retry_private;
|
||||
|
||||
put_futex_key(fshared, &q.key);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Userspace attempted a TID -> 0 atomic transition, and failed.
|
||||
* This is the in-kernel slowpath: we look up the PI state (if any),
|
||||
@@ -1619,7 +1578,7 @@ static int futex_unlock_pi(u32 __user *uaddr, int fshared)
|
||||
u32 uval;
|
||||
struct plist_head *head;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
int ret, attempt = 0;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
if (get_user(uval, uaddr))
|
||||
@@ -1635,7 +1594,6 @@ retry:
|
||||
goto out;
|
||||
|
||||
hb = hash_futex(&key);
|
||||
retry_unlocked:
|
||||
spin_lock(&hb->lock);
|
||||
|
||||
/*
|
||||
@@ -1700,14 +1658,7 @@ pi_faulted:
|
||||
* we have to drop the mmap_sem in order to call get_user().
|
||||
*/
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
if (attempt++) {
|
||||
ret = futex_handle_fault((unsigned long)uaddr, attempt);
|
||||
if (ret)
|
||||
goto out;
|
||||
uval = 0;
|
||||
goto retry_unlocked;
|
||||
}
|
||||
put_futex_key(fshared, &key);
|
||||
|
||||
ret = get_user(uval, uaddr);
|
||||
if (!ret)
|
||||
|
@@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq)
|
||||
desc->irq_count = 0;
|
||||
desc->irqs_unhandled = 0;
|
||||
#ifdef CONFIG_SMP
|
||||
cpumask_setall(&desc->affinity);
|
||||
cpumask_setall(desc->affinity);
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
cpumask_clear(desc->pending_mask);
|
||||
#endif
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
@@ -78,6 +81,7 @@ void dynamic_irq_cleanup(unsigned int irq)
|
||||
desc->handle_irq = handle_bad_irq;
|
||||
desc->chip = &no_irq_chip;
|
||||
desc->name = NULL;
|
||||
clear_kstat_irqs(desc);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
||||
@@ -290,7 +294,8 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
|
||||
desc->chip->mask_ack(irq);
|
||||
else {
|
||||
desc->chip->mask(irq);
|
||||
desc->chip->ack(irq);
|
||||
if (desc->chip->ack)
|
||||
desc->chip->ack(irq);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,7 +481,8 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
|
||||
kstat_incr_irqs_this_cpu(irq, desc);
|
||||
|
||||
/* Start handling the irq */
|
||||
desc->chip->ack(irq);
|
||||
if (desc->chip->ack)
|
||||
desc->chip->ack(irq);
|
||||
desc = irq_remap_to_desc(irq, desc);
|
||||
|
||||
/* Mark the IRQ currently in progress.*/
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/bootmem.h>
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
@@ -69,6 +70,7 @@ int nr_irqs = NR_IRQS;
|
||||
EXPORT_SYMBOL_GPL(nr_irqs);
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
|
||||
static struct irq_desc irq_desc_init = {
|
||||
.irq = -1,
|
||||
.status = IRQ_DISABLED,
|
||||
@@ -76,26 +78,25 @@ static struct irq_desc irq_desc_init = {
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
};
|
||||
|
||||
void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
|
||||
{
|
||||
unsigned long bytes;
|
||||
char *ptr;
|
||||
int node;
|
||||
|
||||
/* Compute how many bytes we need per irq and allocate them */
|
||||
bytes = nr * sizeof(unsigned int);
|
||||
void *ptr;
|
||||
|
||||
node = cpu_to_node(cpu);
|
||||
ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
|
||||
printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n", cpu, node);
|
||||
ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
|
||||
|
||||
if (ptr)
|
||||
desc->kstat_irqs = (unsigned int *)ptr;
|
||||
/*
|
||||
* don't overwite if can not get new one
|
||||
* init_copy_kstat_irqs() could still use old one
|
||||
*/
|
||||
if (ptr) {
|
||||
printk(KERN_DEBUG " alloc kstat_irqs on cpu %d node %d\n",
|
||||
cpu, node);
|
||||
desc->kstat_irqs = ptr;
|
||||
}
|
||||
}
|
||||
|
||||
static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
|
||||
@@ -113,6 +114,10 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
|
||||
printk(KERN_ERR "can not alloc kstat_irqs\n");
|
||||
BUG_ON(1);
|
||||
}
|
||||
if (!init_alloc_desc_masks(desc, cpu, false)) {
|
||||
printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
|
||||
BUG_ON(1);
|
||||
}
|
||||
arch_init_chip_data(desc, cpu);
|
||||
}
|
||||
|
||||
@@ -121,7 +126,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
|
||||
*/
|
||||
DEFINE_SPINLOCK(sparse_irq_lock);
|
||||
|
||||
struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
|
||||
struct irq_desc **irq_desc_ptrs __read_mostly;
|
||||
|
||||
static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
|
||||
[0 ... NR_IRQS_LEGACY-1] = {
|
||||
@@ -131,14 +136,10 @@ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_sm
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
/* FIXME: use bootmem alloc ...*/
|
||||
static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
|
||||
static unsigned int *kstat_irqs_legacy;
|
||||
|
||||
int __init early_irq_init(void)
|
||||
{
|
||||
@@ -148,18 +149,30 @@ int __init early_irq_init(void)
|
||||
|
||||
init_irq_default_affinity();
|
||||
|
||||
/* initialize nr_irqs based on nr_cpu_ids */
|
||||
arch_probe_nr_irqs();
|
||||
printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
|
||||
|
||||
desc = irq_desc_legacy;
|
||||
legacy_count = ARRAY_SIZE(irq_desc_legacy);
|
||||
|
||||
/* allocate irq_desc_ptrs array based on nr_irqs */
|
||||
irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
|
||||
|
||||
/* allocate based on nr_cpu_ids */
|
||||
/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
|
||||
kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
|
||||
sizeof(int));
|
||||
|
||||
for (i = 0; i < legacy_count; i++) {
|
||||
desc[i].irq = i;
|
||||
desc[i].kstat_irqs = kstat_irqs_legacy[i];
|
||||
desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
|
||||
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
|
||||
|
||||
init_alloc_desc_masks(&desc[i], 0, true);
|
||||
irq_desc_ptrs[i] = desc + i;
|
||||
}
|
||||
|
||||
for (i = legacy_count; i < NR_IRQS; i++)
|
||||
for (i = legacy_count; i < nr_irqs; i++)
|
||||
irq_desc_ptrs[i] = NULL;
|
||||
|
||||
return arch_early_irq_init();
|
||||
@@ -167,7 +180,10 @@ int __init early_irq_init(void)
|
||||
|
||||
struct irq_desc *irq_to_desc(unsigned int irq)
|
||||
{
|
||||
return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
|
||||
if (irq_desc_ptrs && irq < nr_irqs)
|
||||
return irq_desc_ptrs[irq];
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
|
||||
@@ -176,10 +192,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
|
||||
unsigned long flags;
|
||||
int node;
|
||||
|
||||
if (irq >= NR_IRQS) {
|
||||
printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
|
||||
irq, NR_IRQS);
|
||||
WARN_ON(1);
|
||||
if (irq >= nr_irqs) {
|
||||
WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
|
||||
irq, nr_irqs);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -221,12 +236,10 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
|
||||
.handle_irq = handle_bad_irq,
|
||||
.depth = 1,
|
||||
.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
|
||||
#ifdef CONFIG_SMP
|
||||
.affinity = CPU_MASK_ALL
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
|
||||
int __init early_irq_init(void)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
@@ -235,12 +248,16 @@ int __init early_irq_init(void)
|
||||
|
||||
init_irq_default_affinity();
|
||||
|
||||
printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
|
||||
|
||||
desc = irq_desc;
|
||||
count = ARRAY_SIZE(irq_desc);
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
for (i = 0; i < count; i++) {
|
||||
desc[i].irq = i;
|
||||
|
||||
init_alloc_desc_masks(&desc[i], 0, true);
|
||||
desc[i].kstat_irqs = kstat_irqs_all[i];
|
||||
}
|
||||
return arch_early_irq_init();
|
||||
}
|
||||
|
||||
@@ -255,6 +272,11 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
|
||||
}
|
||||
#endif /* !CONFIG_SPARSE_IRQ */
|
||||
|
||||
void clear_kstat_irqs(struct irq_desc *desc)
|
||||
{
|
||||
memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
|
||||
}
|
||||
|
||||
/*
|
||||
* What should we do if we get a hw irq event on an illegal vector?
|
||||
* Each architecture has to answer this themself.
|
||||
@@ -328,6 +350,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
|
||||
irqreturn_t ret, retval = IRQ_NONE;
|
||||
unsigned int status = 0;
|
||||
|
||||
WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
|
||||
|
||||
if (!(action->flags & IRQF_DISABLED))
|
||||
local_irq_enable_in_hardirq();
|
||||
|
||||
@@ -347,6 +371,11 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
|
||||
}
|
||||
|
||||
#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
|
||||
|
||||
#ifdef CONFIG_ENABLE_WARN_DEPRECATED
|
||||
# warning __do_IRQ is deprecated. Please convert to proper flow handlers
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __do_IRQ - original all in one highlevel IRQ handler
|
||||
* @irq: the interrupt number
|
||||
@@ -467,12 +496,10 @@ void early_init_irq_lock_class(void)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
return desc ? desc->kstat_irqs[cpu] : 0;
|
||||
}
|
||||
#endif
|
||||
EXPORT_SYMBOL(kstat_irqs_cpu);
|
||||
|
||||
|
@@ -15,8 +15,16 @@ extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
||||
|
||||
extern struct lock_class_key irq_desc_lock_class;
|
||||
extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
|
||||
extern void clear_kstat_irqs(struct irq_desc *desc);
|
||||
extern spinlock_t sparse_irq_lock;
|
||||
|
||||
#ifdef CONFIG_SPARSE_IRQ
|
||||
/* irq_desc_ptrs allocated at boot time */
|
||||
extern struct irq_desc **irq_desc_ptrs;
|
||||
#else
|
||||
/* irq_desc_ptrs is a fixed size array */
|
||||
extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
|
||||
|
@@ -90,14 +90,14 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
|
||||
cpumask_copy(&desc->affinity, cpumask);
|
||||
cpumask_copy(desc->affinity, cpumask);
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
} else {
|
||||
desc->status |= IRQ_MOVE_PENDING;
|
||||
cpumask_copy(&desc->pending_mask, cpumask);
|
||||
cpumask_copy(desc->pending_mask, cpumask);
|
||||
}
|
||||
#else
|
||||
cpumask_copy(&desc->affinity, cpumask);
|
||||
cpumask_copy(desc->affinity, cpumask);
|
||||
desc->chip->set_affinity(irq, cpumask);
|
||||
#endif
|
||||
desc->status |= IRQ_AFFINITY_SET;
|
||||
@@ -109,7 +109,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
|
||||
/*
|
||||
* Generic version of the affinity autoselector.
|
||||
*/
|
||||
int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
|
||||
static int setup_affinity(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
if (!irq_can_set_affinity(irq))
|
||||
return 0;
|
||||
@@ -119,21 +119,21 @@ int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
|
||||
* one of the targets is online.
|
||||
*/
|
||||
if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
|
||||
if (cpumask_any_and(&desc->affinity, cpu_online_mask)
|
||||
if (cpumask_any_and(desc->affinity, cpu_online_mask)
|
||||
< nr_cpu_ids)
|
||||
goto set_affinity;
|
||||
else
|
||||
desc->status &= ~IRQ_AFFINITY_SET;
|
||||
}
|
||||
|
||||
cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
|
||||
cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
|
||||
set_affinity:
|
||||
desc->chip->set_affinity(irq, &desc->affinity);
|
||||
desc->chip->set_affinity(irq, desc->affinity);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d)
|
||||
static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
|
||||
{
|
||||
return irq_select_affinity(irq);
|
||||
}
|
||||
@@ -149,14 +149,14 @@ int irq_select_affinity_usr(unsigned int irq)
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
ret = do_irq_select_affinity(irq, desc);
|
||||
ret = setup_affinity(irq, desc);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline int do_irq_select_affinity(int irq, struct irq_desc *desc)
|
||||
static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@@ -389,9 +389,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
|
||||
* allocate special interrupts that are part of the architecture.
|
||||
*/
|
||||
static int
|
||||
__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
|
||||
__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
|
||||
{
|
||||
struct irqaction *old, **p;
|
||||
struct irqaction *old, **old_ptr;
|
||||
const char *old_name = NULL;
|
||||
unsigned long flags;
|
||||
int shared = 0;
|
||||
@@ -423,8 +423,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
|
||||
* The following block of code has to be executed atomically
|
||||
*/
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
p = &desc->action;
|
||||
old = *p;
|
||||
old_ptr = &desc->action;
|
||||
old = *old_ptr;
|
||||
if (old) {
|
||||
/*
|
||||
* Can't share interrupts unless both agree to and are
|
||||
@@ -447,8 +447,8 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
|
||||
|
||||
/* add new interrupt at end of irq queue */
|
||||
do {
|
||||
p = &old->next;
|
||||
old = *p;
|
||||
old_ptr = &old->next;
|
||||
old = *old_ptr;
|
||||
} while (old);
|
||||
shared = 1;
|
||||
}
|
||||
@@ -488,7 +488,7 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
|
||||
desc->status |= IRQ_NO_BALANCING;
|
||||
|
||||
/* Set default affinity mask once everything is setup */
|
||||
do_irq_select_affinity(irq, desc);
|
||||
setup_affinity(irq, desc);
|
||||
|
||||
} else if ((new->flags & IRQF_TRIGGER_MASK)
|
||||
&& (new->flags & IRQF_TRIGGER_MASK)
|
||||
@@ -499,7 +499,7 @@ __setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
|
||||
(int)(new->flags & IRQF_TRIGGER_MASK));
|
||||
}
|
||||
|
||||
*p = new;
|
||||
*old_ptr = new;
|
||||
|
||||
/* Reset broken irq detection when installing new handler */
|
||||
desc->irq_count = 0;
|
||||
@@ -549,9 +549,102 @@ int setup_irq(unsigned int irq, struct irqaction *act)
|
||||
|
||||
return __setup_irq(irq, desc, act);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(setup_irq);
|
||||
|
||||
/*
|
||||
* Internal function to unregister an irqaction - used to free
|
||||
* regular and special interrupts that are part of the architecture.
|
||||
*/
|
||||
static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irqaction *action, **action_ptr;
|
||||
unsigned long flags;
|
||||
|
||||
WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
|
||||
|
||||
if (!desc)
|
||||
return NULL;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
|
||||
/*
|
||||
* There can be multiple actions per IRQ descriptor, find the right
|
||||
* one based on the dev_id:
|
||||
*/
|
||||
action_ptr = &desc->action;
|
||||
for (;;) {
|
||||
action = *action_ptr;
|
||||
|
||||
if (!action) {
|
||||
WARN(1, "Trying to free already-free IRQ %d\n", irq);
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (action->dev_id == dev_id)
|
||||
break;
|
||||
action_ptr = &action->next;
|
||||
}
|
||||
|
||||
/* Found it - now remove it from the list of entries: */
|
||||
*action_ptr = action->next;
|
||||
|
||||
/* Currently used only by UML, might disappear one day: */
|
||||
#ifdef CONFIG_IRQ_RELEASE_METHOD
|
||||
if (desc->chip->release)
|
||||
desc->chip->release(irq, dev_id);
|
||||
#endif
|
||||
|
||||
/* If this was the last handler, shut down the IRQ line: */
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
if (desc->chip->shutdown)
|
||||
desc->chip->shutdown(irq);
|
||||
else
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
|
||||
unregister_handler_proc(irq, action);
|
||||
|
||||
/* Make sure it's not being used on another CPU: */
|
||||
synchronize_irq(irq);
|
||||
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
/*
|
||||
* It's a shared IRQ -- the driver ought to be prepared for an IRQ
|
||||
* event to happen even now it's being freed, so let's make sure that
|
||||
* is so by doing an extra call to the handler ....
|
||||
*
|
||||
* ( We do this after actually deregistering it, to make sure that a
|
||||
* 'real' IRQ doesn't run in * parallel with our fake. )
|
||||
*/
|
||||
if (action->flags & IRQF_SHARED) {
|
||||
local_irq_save(flags);
|
||||
action->handler(irq, dev_id);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#endif
|
||||
return action;
|
||||
}
|
||||
|
||||
/**
|
||||
* free_irq - free an interrupt
|
||||
* remove_irq - free an interrupt
|
||||
* @irq: Interrupt line to free
|
||||
* @act: irqaction for the interrupt
|
||||
*
|
||||
* Used to remove interrupts statically setup by the early boot process.
|
||||
*/
|
||||
void remove_irq(unsigned int irq, struct irqaction *act)
|
||||
{
|
||||
__free_irq(irq, act->dev_id);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(remove_irq);
|
||||
|
||||
/**
|
||||
* free_irq - free an interrupt allocated with request_irq
|
||||
* @irq: Interrupt line to free
|
||||
* @dev_id: Device identity to free
|
||||
*
|
||||
@@ -566,73 +659,7 @@ int setup_irq(unsigned int irq, struct irqaction *act)
|
||||
*/
|
||||
void free_irq(unsigned int irq, void *dev_id)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(irq);
|
||||
struct irqaction **p;
|
||||
unsigned long flags;
|
||||
|
||||
WARN_ON(in_interrupt());
|
||||
|
||||
if (!desc)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&desc->lock, flags);
|
||||
p = &desc->action;
|
||||
for (;;) {
|
||||
struct irqaction *action = *p;
|
||||
|
||||
if (action) {
|
||||
struct irqaction **pp = p;
|
||||
|
||||
p = &action->next;
|
||||
if (action->dev_id != dev_id)
|
||||
continue;
|
||||
|
||||
/* Found it - now remove it from the list of entries */
|
||||
*pp = action->next;
|
||||
|
||||
/* Currently used only by UML, might disappear one day.*/
|
||||
#ifdef CONFIG_IRQ_RELEASE_METHOD
|
||||
if (desc->chip->release)
|
||||
desc->chip->release(irq, dev_id);
|
||||
#endif
|
||||
|
||||
if (!desc->action) {
|
||||
desc->status |= IRQ_DISABLED;
|
||||
if (desc->chip->shutdown)
|
||||
desc->chip->shutdown(irq);
|
||||
else
|
||||
desc->chip->disable(irq);
|
||||
}
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
unregister_handler_proc(irq, action);
|
||||
|
||||
/* Make sure it's not being used on another CPU */
|
||||
synchronize_irq(irq);
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
/*
|
||||
* It's a shared IRQ -- the driver ought to be
|
||||
* prepared for it to happen even now it's
|
||||
* being freed, so let's make sure.... We do
|
||||
* this after actually deregistering it, to
|
||||
* make sure that a 'real' IRQ doesn't run in
|
||||
* parallel with our fake
|
||||
*/
|
||||
if (action->flags & IRQF_SHARED) {
|
||||
local_irq_save(flags);
|
||||
action->handler(irq, dev_id);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#endif
|
||||
kfree(action);
|
||||
return;
|
||||
}
|
||||
printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
dump_stack();
|
||||
#endif
|
||||
spin_unlock_irqrestore(&desc->lock, flags);
|
||||
return;
|
||||
}
|
||||
kfree(__free_irq(irq, dev_id));
|
||||
}
|
||||
EXPORT_SYMBOL(free_irq);
|
||||
|
||||
@@ -679,11 +706,12 @@ int request_irq(unsigned int irq, irq_handler_t handler,
|
||||
* the behavior is classified as "will not fix" so we need to
|
||||
* start nudging drivers away from using that idiom.
|
||||
*/
|
||||
if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
|
||||
== (IRQF_SHARED|IRQF_DISABLED))
|
||||
pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
|
||||
"guaranteed on shared IRQs\n",
|
||||
irq, devname);
|
||||
if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
|
||||
(IRQF_SHARED|IRQF_DISABLED)) {
|
||||
pr_warning(
|
||||
"IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
|
||||
irq, devname);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
/*
|
||||
@@ -709,15 +737,13 @@ int request_irq(unsigned int irq, irq_handler_t handler,
|
||||
if (!handler)
|
||||
return -EINVAL;
|
||||
|
||||
action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
|
||||
action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
|
||||
if (!action)
|
||||
return -ENOMEM;
|
||||
|
||||
action->handler = handler;
|
||||
action->flags = irqflags;
|
||||
cpus_clear(action->mask);
|
||||
action->name = devname;
|
||||
action->next = NULL;
|
||||
action->dev_id = dev_id;
|
||||
|
||||
retval = __setup_irq(irq, desc, action);
|
||||
|
@@ -18,7 +18,7 @@ void move_masked_irq(int irq)
|
||||
|
||||
desc->status &= ~IRQ_MOVE_PENDING;
|
||||
|
||||
if (unlikely(cpumask_empty(&desc->pending_mask)))
|
||||
if (unlikely(cpumask_empty(desc->pending_mask)))
|
||||
return;
|
||||
|
||||
if (!desc->chip->set_affinity)
|
||||
@@ -38,13 +38,13 @@ void move_masked_irq(int irq)
|
||||
* For correct operation this depends on the caller
|
||||
* masking the irqs.
|
||||
*/
|
||||
if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
|
||||
if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
|
||||
< nr_cpu_ids)) {
|
||||
cpumask_and(&desc->affinity,
|
||||
&desc->pending_mask, cpu_online_mask);
|
||||
desc->chip->set_affinity(irq, &desc->affinity);
|
||||
cpumask_and(desc->affinity,
|
||||
desc->pending_mask, cpu_online_mask);
|
||||
desc->chip->set_affinity(irq, desc->affinity);
|
||||
}
|
||||
cpumask_clear(&desc->pending_mask);
|
||||
cpumask_clear(desc->pending_mask);
|
||||
}
|
||||
|
||||
void move_native_irq(int irq)
|
||||
|
@@ -17,16 +17,11 @@ static void init_copy_kstat_irqs(struct irq_desc *old_desc,
|
||||
struct irq_desc *desc,
|
||||
int cpu, int nr)
|
||||
{
|
||||
unsigned long bytes;
|
||||
|
||||
init_kstat_irqs(desc, cpu, nr);
|
||||
|
||||
if (desc->kstat_irqs != old_desc->kstat_irqs) {
|
||||
/* Compute how many bytes we need per irq and allocate them */
|
||||
bytes = nr * sizeof(unsigned int);
|
||||
|
||||
memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
|
||||
}
|
||||
if (desc->kstat_irqs != old_desc->kstat_irqs)
|
||||
memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
|
||||
nr * sizeof(*desc->kstat_irqs));
|
||||
}
|
||||
|
||||
static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
@@ -38,15 +33,22 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
old_desc->kstat_irqs = NULL;
|
||||
}
|
||||
|
||||
static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
|
||||
static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
|
||||
struct irq_desc *desc, int cpu)
|
||||
{
|
||||
memcpy(desc, old_desc, sizeof(struct irq_desc));
|
||||
if (!init_alloc_desc_masks(desc, cpu, false)) {
|
||||
printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
|
||||
"for migration.\n", irq);
|
||||
return false;
|
||||
}
|
||||
spin_lock_init(&desc->lock);
|
||||
desc->cpu = cpu;
|
||||
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
|
||||
init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
|
||||
init_copy_desc_masks(old_desc, desc);
|
||||
arch_init_copy_chip_data(old_desc, desc, cpu);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
|
||||
@@ -76,12 +78,18 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
|
||||
node = cpu_to_node(cpu);
|
||||
desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
|
||||
if (!desc) {
|
||||
printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
|
||||
printk(KERN_ERR "irq %d: can not get new irq_desc "
|
||||
"for migration.\n", irq);
|
||||
/* still use old one */
|
||||
desc = old_desc;
|
||||
goto out_unlock;
|
||||
}
|
||||
init_copy_one_irq_desc(irq, old_desc, desc, cpu);
|
||||
if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
|
||||
/* still use old one */
|
||||
kfree(desc);
|
||||
desc = old_desc;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
irq_desc_ptrs[irq] = desc;
|
||||
spin_unlock_irqrestore(&sparse_irq_lock, flags);
|
||||
|
@@ -20,11 +20,11 @@ static struct proc_dir_entry *root_irq_dir;
|
||||
static int irq_affinity_proc_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc((long)m->private);
|
||||
const struct cpumask *mask = &desc->affinity;
|
||||
const struct cpumask *mask = desc->affinity;
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
if (desc->status & IRQ_MOVE_PENDING)
|
||||
mask = &desc->pending_mask;
|
||||
mask = desc->pending_mask;
|
||||
#endif
|
||||
seq_cpumask(m, mask);
|
||||
seq_putc(m, '\n');
|
||||
|
@@ -104,7 +104,7 @@ static int misrouted_irq(int irq)
|
||||
return ok;
|
||||
}
|
||||
|
||||
static void poll_spurious_irqs(unsigned long dummy)
|
||||
static void poll_all_shared_irqs(void)
|
||||
{
|
||||
struct irq_desc *desc;
|
||||
int i;
|
||||
@@ -123,11 +123,23 @@ static void poll_spurious_irqs(unsigned long dummy)
|
||||
|
||||
try_one_irq(i, desc);
|
||||
}
|
||||
}
|
||||
|
||||
static void poll_spurious_irqs(unsigned long dummy)
|
||||
{
|
||||
poll_all_shared_irqs();
|
||||
|
||||
mod_timer(&poll_spurious_irq_timer,
|
||||
jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_SHIRQ
|
||||
void debug_poll_all_shared_irqs(void)
|
||||
{
|
||||
poll_all_shared_irqs();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If 99,900 of the previous 100,000 interrupts have not been handled
|
||||
* then assume that the IRQ is stuck in some manner. Drop a diagnostic
|
||||
|
@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs, int cpu)
|
||||
return;
|
||||
memset(&prstatus, 0, sizeof(prstatus));
|
||||
prstatus.pr_pid = current->pid;
|
||||
elf_core_copy_regs(&prstatus.pr_reg, regs);
|
||||
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
|
||||
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
|
||||
&prstatus, sizeof(prstatus));
|
||||
final_note(buf);
|
||||
|
@@ -9,6 +9,44 @@
|
||||
* as published by the Free Software Foundation; version 2
|
||||
* of the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
|
||||
* used by the "latencytop" userspace tool. The latency that is tracked is not
|
||||
* the 'traditional' interrupt latency (which is primarily caused by something
|
||||
* else consuming CPU), but instead, it is the latency an application encounters
|
||||
* because the kernel sleeps on its behalf for various reasons.
|
||||
*
|
||||
* This code tracks 2 levels of statistics:
|
||||
* 1) System level latency
|
||||
* 2) Per process latency
|
||||
*
|
||||
* The latency is stored in fixed sized data structures in an accumulated form;
|
||||
* if the "same" latency cause is hit twice, this will be tracked as one entry
|
||||
* in the data structure. Both the count, total accumulated latency and maximum
|
||||
* latency are tracked in this data structure. When the fixed size structure is
|
||||
* full, no new causes are tracked until the buffer is flushed by writing to
|
||||
* the /proc file; the userspace tool does this on a regular basis.
|
||||
*
|
||||
* A latency cause is identified by a stringified backtrace at the point that
|
||||
* the scheduler gets invoked. The userland tool will use this string to
|
||||
* identify the cause of the latency in human readable form.
|
||||
*
|
||||
* The information is exported via /proc/latency_stats and /proc/<pid>/latency.
|
||||
* These files look like this:
|
||||
*
|
||||
* Latency Top version : v0.1
|
||||
* 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
|
||||
* | | | |
|
||||
* | | | +----> the stringified backtrace
|
||||
* | | +---------> The maximum latency for this entry in microseconds
|
||||
* | +--------------> The accumulated latency for this entry (microseconds)
|
||||
* +-------------------> The number of times this entry is hit
|
||||
*
|
||||
* (note: the average latency is the accumulated latency divided by the number
|
||||
* of times)
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/seq_file.h>
|
||||
@@ -72,7 +110,7 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
|
||||
firstnonnull = i;
|
||||
continue;
|
||||
}
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
|
||||
unsigned long record = lat->backtrace[q];
|
||||
|
||||
if (latency_record[i].backtrace[q] != record) {
|
||||
@@ -101,31 +139,52 @@ account_global_scheduler_latency(struct task_struct *tsk, struct latency_record
|
||||
memcpy(&latency_record[i], lat, sizeof(struct latency_record));
|
||||
}
|
||||
|
||||
static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
|
||||
/*
|
||||
* Iterator to store a backtrace into a latency record entry
|
||||
*/
|
||||
static inline void store_stacktrace(struct task_struct *tsk,
|
||||
struct latency_record *lat)
|
||||
{
|
||||
struct stack_trace trace;
|
||||
|
||||
memset(&trace, 0, sizeof(trace));
|
||||
trace.max_entries = LT_BACKTRACEDEPTH;
|
||||
trace.entries = &lat->backtrace[0];
|
||||
trace.skip = 0;
|
||||
save_stack_trace_tsk(tsk, &trace);
|
||||
}
|
||||
|
||||
/**
|
||||
* __account_scheduler_latency - record an occured latency
|
||||
* @tsk - the task struct of the task hitting the latency
|
||||
* @usecs - the duration of the latency in microseconds
|
||||
* @inter - 1 if the sleep was interruptible, 0 if uninterruptible
|
||||
*
|
||||
* This function is the main entry point for recording latency entries
|
||||
* as called by the scheduler.
|
||||
*
|
||||
* This function has a few special cases to deal with normal 'non-latency'
|
||||
* sleeps: specifically, interruptible sleep longer than 5 msec is skipped
|
||||
* since this usually is caused by waiting for events via select() and co.
|
||||
*
|
||||
* Negative latencies (caused by time going backwards) are also explicitly
|
||||
* skipped.
|
||||
*/
|
||||
void __sched
|
||||
account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
{
|
||||
unsigned long flags;
|
||||
int i, q;
|
||||
struct latency_record lat;
|
||||
|
||||
if (!latencytop_enabled)
|
||||
return;
|
||||
|
||||
/* Long interruptible waits are generally user requested... */
|
||||
if (inter && usecs > 5000)
|
||||
return;
|
||||
|
||||
/* Negative sleeps are time going backwards */
|
||||
/* Zero-time sleeps are non-interesting */
|
||||
if (usecs <= 0)
|
||||
return;
|
||||
|
||||
memset(&lat, 0, sizeof(lat));
|
||||
lat.count = 1;
|
||||
lat.time = usecs;
|
||||
@@ -143,12 +202,12 @@ account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
|
||||
if (tsk->latency_record_count >= LT_SAVECOUNT)
|
||||
goto out_unlock;
|
||||
|
||||
for (i = 0; i < LT_SAVECOUNT ; i++) {
|
||||
for (i = 0; i < LT_SAVECOUNT; i++) {
|
||||
struct latency_record *mylat;
|
||||
int same = 1;
|
||||
|
||||
mylat = &tsk->latency_record[i];
|
||||
for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
|
||||
for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
|
||||
unsigned long record = lat.backtrace[q];
|
||||
|
||||
if (mylat->backtrace[q] != record) {
|
||||
@@ -186,7 +245,7 @@ static int lstats_show(struct seq_file *m, void *v)
|
||||
for (i = 0; i < MAXLR; i++) {
|
||||
if (latency_record[i].backtrace[0]) {
|
||||
int q;
|
||||
seq_printf(m, "%i %li %li ",
|
||||
seq_printf(m, "%i %lu %lu ",
|
||||
latency_record[i].count,
|
||||
latency_record[i].time,
|
||||
latency_record[i].max);
|
||||
@@ -223,7 +282,7 @@ static int lstats_open(struct inode *inode, struct file *filp)
|
||||
return single_open(filp, lstats_show, NULL);
|
||||
}
|
||||
|
||||
static struct file_operations lstats_fops = {
|
||||
static const struct file_operations lstats_fops = {
|
||||
.open = lstats_open,
|
||||
.read = seq_read,
|
||||
.write = lstats_write,
|
||||
@@ -236,4 +295,4 @@ static int __init init_lstats_procfs(void)
|
||||
proc_create("latency_stats", 0644, NULL, &lstats_fops);
|
||||
return 0;
|
||||
}
|
||||
__initcall(init_lstats_procfs);
|
||||
device_initcall(init_lstats_procfs);
|
||||
|
@@ -51,6 +51,7 @@
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/async.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
#if 0
|
||||
#define DEBUGP printk
|
||||
@@ -366,6 +367,34 @@ static struct module *find_module(const char *name)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
|
||||
|
||||
static void *percpu_modalloc(unsigned long size, unsigned long align,
|
||||
const char *name)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
if (align > PAGE_SIZE) {
|
||||
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
|
||||
name, align, PAGE_SIZE);
|
||||
align = PAGE_SIZE;
|
||||
}
|
||||
|
||||
ptr = __alloc_reserved_percpu(size, align);
|
||||
if (!ptr)
|
||||
printk(KERN_WARNING
|
||||
"Could not allocate %lu bytes percpu data\n", size);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static void percpu_modfree(void *freeme)
|
||||
{
|
||||
free_percpu(freeme);
|
||||
}
|
||||
|
||||
#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
|
||||
|
||||
/* Number of blocks used and allocated. */
|
||||
static unsigned int pcpu_num_used, pcpu_num_allocated;
|
||||
/* Size of each block. -ve means used. */
|
||||
@@ -480,21 +509,6 @@ static void percpu_modfree(void *freeme)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
|
||||
Elf_Shdr *sechdrs,
|
||||
const char *secstrings)
|
||||
{
|
||||
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
|
||||
}
|
||||
|
||||
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
|
||||
}
|
||||
|
||||
static int percpu_modinit(void)
|
||||
{
|
||||
pcpu_num_used = 2;
|
||||
@@ -513,7 +527,26 @@ static int percpu_modinit(void)
|
||||
return 0;
|
||||
}
|
||||
__initcall(percpu_modinit);
|
||||
|
||||
#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
|
||||
|
||||
static unsigned int find_pcpusec(Elf_Ehdr *hdr,
|
||||
Elf_Shdr *sechdrs,
|
||||
const char *secstrings)
|
||||
{
|
||||
return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
|
||||
}
|
||||
|
||||
static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
memcpy(pcpudest + per_cpu_offset(cpu), from, size);
|
||||
}
|
||||
|
||||
#else /* ... !CONFIG_SMP */
|
||||
|
||||
static inline void *percpu_modalloc(unsigned long size, unsigned long align,
|
||||
const char *name)
|
||||
{
|
||||
@@ -535,6 +568,7 @@ static inline void percpu_modcopy(void *pcpudst, const void *src,
|
||||
/* pcpusec should be 0, and size of that section should be 0. */
|
||||
BUG_ON(size != 0);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#define MODINFO_ATTR(field) \
|
||||
@@ -822,7 +856,7 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
|
||||
mutex_lock(&module_mutex);
|
||||
/* Store the name of the last unloaded module for diagnostic purposes */
|
||||
strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module));
|
||||
unregister_dynamic_debug_module(mod->name);
|
||||
ddebug_remove_module(mod->name);
|
||||
free_module(mod);
|
||||
|
||||
out:
|
||||
@@ -1827,19 +1861,13 @@ static inline void add_kallsyms(struct module *mod,
|
||||
}
|
||||
#endif /* CONFIG_KALLSYMS */
|
||||
|
||||
static void dynamic_printk_setup(struct mod_debug *debug, unsigned int num)
|
||||
static void dynamic_debug_setup(struct _ddebug *debug, unsigned int num)
|
||||
{
|
||||
#ifdef CONFIG_DYNAMIC_PRINTK_DEBUG
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
register_dynamic_debug_module(debug[i].modname,
|
||||
debug[i].type,
|
||||
debug[i].logical_modname,
|
||||
debug[i].flag_names,
|
||||
debug[i].hash, debug[i].hash2);
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_PRINTK_DEBUG */
|
||||
#ifdef CONFIG_DYNAMIC_DEBUG
|
||||
if (ddebug_add_module(debug, num, debug->modname))
|
||||
printk(KERN_ERR "dynamic debug error adding module: %s\n",
|
||||
debug->modname);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void *module_alloc_update_bounds(unsigned long size)
|
||||
@@ -2213,12 +2241,13 @@ static noinline struct module *load_module(void __user *umod,
|
||||
add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
|
||||
|
||||
if (!mod->taints) {
|
||||
struct mod_debug *debug;
|
||||
struct _ddebug *debug;
|
||||
unsigned int num_debug;
|
||||
|
||||
debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
|
||||
sizeof(*debug), &num_debug);
|
||||
dynamic_printk_setup(debug, num_debug);
|
||||
if (debug)
|
||||
dynamic_debug_setup(debug, num_debug);
|
||||
}
|
||||
|
||||
/* sechdrs[0].sh_size is always zero */
|
||||
|
@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
|
||||
vsnprintf(buf, sizeof(buf), fmt, args);
|
||||
va_end(args);
|
||||
printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
|
||||
#ifdef CONFIG_DEBUG_BUGVERBOSE
|
||||
dump_stack();
|
||||
#endif
|
||||
bust_spinlocks(0);
|
||||
|
||||
/*
|
||||
@@ -355,15 +358,18 @@ EXPORT_SYMBOL(warn_slowpath);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
|
||||
/*
|
||||
* Called when gcc's -fstack-protector feature is used, and
|
||||
* gcc detects corruption of the on-stack canary value
|
||||
*/
|
||||
void __stack_chk_fail(void)
|
||||
{
|
||||
panic("stack-protector: Kernel stack is corrupted");
|
||||
panic("stack-protector: Kernel stack is corrupted in: %p\n",
|
||||
__builtin_return_address(0));
|
||||
}
|
||||
EXPORT_SYMBOL(__stack_chk_fail);
|
||||
|
||||
#endif
|
||||
|
||||
core_param(panic, panic_timeout, int, 0644);
|
||||
|
@@ -1370,7 +1370,8 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
|
||||
if (task_cputime_expired(&group_sample, &sig->cputime_expires))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -750,7 +750,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
|
||||
* from the scheduler (trying to re-grab
|
||||
* rq->lock), so defer it.
|
||||
*/
|
||||
__mod_timer(&buf->timer, jiffies + 1);
|
||||
mod_timer(&buf->timer, jiffies + 1);
|
||||
}
|
||||
|
||||
old = buf->data;
|
||||
|
1072
kernel/sched.c
1072
kernel/sched.c
File diff suppressed because it is too large
Load Diff
@@ -24,11 +24,11 @@
|
||||
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
|
||||
* consistent between cpus (never more than 2 jiffies difference).
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
@@ -43,6 +43,7 @@ unsigned long long __attribute__((weak)) sched_clock(void)
|
||||
static __read_mostly int sched_clock_running;
|
||||
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
__read_mostly int sched_clock_stable;
|
||||
|
||||
struct sched_clock_data {
|
||||
/*
|
||||
@@ -87,7 +88,7 @@ void sched_clock_init(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* min,max except they take wrapping into account
|
||||
* min, max except they take wrapping into account
|
||||
*/
|
||||
|
||||
static inline u64 wrap_min(u64 x, u64 y)
|
||||
@@ -111,15 +112,13 @@ static u64 __update_sched_clock(struct sched_clock_data *scd, u64 now)
|
||||
s64 delta = now - scd->tick_raw;
|
||||
u64 clock, min_clock, max_clock;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (unlikely(delta < 0))
|
||||
delta = 0;
|
||||
|
||||
/*
|
||||
* scd->clock = clamp(scd->tick_gtod + delta,
|
||||
* max(scd->tick_gtod, scd->clock),
|
||||
* scd->tick_gtod + TICK_NSEC);
|
||||
* max(scd->tick_gtod, scd->clock),
|
||||
* scd->tick_gtod + TICK_NSEC);
|
||||
*/
|
||||
|
||||
clock = scd->tick_gtod + delta;
|
||||
@@ -148,12 +147,13 @@ static void lock_double_clock(struct sched_clock_data *data1,
|
||||
|
||||
u64 sched_clock_cpu(int cpu)
|
||||
{
|
||||
struct sched_clock_data *scd = cpu_sdc(cpu);
|
||||
u64 now, clock, this_clock, remote_clock;
|
||||
struct sched_clock_data *scd;
|
||||
|
||||
if (unlikely(!sched_clock_running))
|
||||
return 0ull;
|
||||
if (sched_clock_stable)
|
||||
return sched_clock();
|
||||
|
||||
scd = cpu_sdc(cpu);
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
now = sched_clock();
|
||||
|
||||
@@ -195,14 +195,18 @@ u64 sched_clock_cpu(int cpu)
|
||||
|
||||
void sched_clock_tick(void)
|
||||
{
|
||||
struct sched_clock_data *scd = this_scd();
|
||||
struct sched_clock_data *scd;
|
||||
u64 now, now_gtod;
|
||||
|
||||
if (sched_clock_stable)
|
||||
return;
|
||||
|
||||
if (unlikely(!sched_clock_running))
|
||||
return;
|
||||
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
scd = this_scd();
|
||||
now_gtod = ktime_to_ns(ktime_get());
|
||||
now = sched_clock();
|
||||
|
||||
@@ -250,7 +254,7 @@ u64 sched_clock_cpu(int cpu)
|
||||
return sched_clock();
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
|
||||
|
||||
unsigned long long cpu_clock(int cpu)
|
||||
{
|
||||
|
@@ -272,7 +272,6 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||
P(nr_switches);
|
||||
P(nr_load_updates);
|
||||
P(nr_uninterruptible);
|
||||
SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies);
|
||||
PN(next_balance);
|
||||
P(curr->pid);
|
||||
PN(clock);
|
||||
@@ -287,9 +286,6 @@ static void print_cpu(struct seq_file *m, int cpu)
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||
|
||||
P(yld_exp_empty);
|
||||
P(yld_act_empty);
|
||||
P(yld_both_empty);
|
||||
P(yld_count);
|
||||
|
||||
P(sched_switch);
|
||||
@@ -314,7 +310,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
|
||||
u64 now = ktime_to_ns(ktime_get());
|
||||
int cpu;
|
||||
|
||||
SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
|
||||
SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
|
||||
init_utsname()->release,
|
||||
(int)strcspn(init_utsname()->version, " "),
|
||||
init_utsname()->version);
|
||||
@@ -325,6 +321,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
|
||||
SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
|
||||
#define PN(x) \
|
||||
SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
|
||||
P(jiffies);
|
||||
PN(sysctl_sched_latency);
|
||||
PN(sysctl_sched_min_granularity);
|
||||
PN(sysctl_sched_wakeup_granularity);
|
||||
@@ -397,6 +394,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
PN(se.vruntime);
|
||||
PN(se.sum_exec_runtime);
|
||||
PN(se.avg_overlap);
|
||||
PN(se.avg_wakeup);
|
||||
|
||||
nr_switches = p->nvcsw + p->nivcsw;
|
||||
|
||||
|
@@ -1314,16 +1314,63 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static unsigned long wakeup_gran(struct sched_entity *se)
|
||||
/*
|
||||
* Adaptive granularity
|
||||
*
|
||||
* se->avg_wakeup gives the average time a task runs until it does a wakeup,
|
||||
* with the limit of wakeup_gran -- when it never does a wakeup.
|
||||
*
|
||||
* So the smaller avg_wakeup is the faster we want this task to preempt,
|
||||
* but we don't want to treat the preemptee unfairly and therefore allow it
|
||||
* to run for at least the amount of time we'd like to run.
|
||||
*
|
||||
* NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
|
||||
*
|
||||
* NOTE: we use *nr_running to scale with load, this nicely matches the
|
||||
* degrading latency on load.
|
||||
*/
|
||||
static unsigned long
|
||||
adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
|
||||
u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
|
||||
u64 gran = 0;
|
||||
|
||||
if (this_run < expected_wakeup)
|
||||
gran = expected_wakeup - this_run;
|
||||
|
||||
return min_t(s64, gran, sysctl_sched_wakeup_granularity);
|
||||
}
|
||||
|
||||
static unsigned long
|
||||
wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
unsigned long gran = sysctl_sched_wakeup_granularity;
|
||||
|
||||
if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
|
||||
gran = adaptive_gran(curr, se);
|
||||
|
||||
/*
|
||||
* More easily preempt - nice tasks, while not making it harder for
|
||||
* + nice tasks.
|
||||
* Since its curr running now, convert the gran from real-time
|
||||
* to virtual-time in his units.
|
||||
*/
|
||||
if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
|
||||
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
|
||||
if (sched_feat(ASYM_GRAN)) {
|
||||
/*
|
||||
* By using 'se' instead of 'curr' we penalize light tasks, so
|
||||
* they get preempted easier. That is, if 'se' < 'curr' then
|
||||
* the resulting gran will be larger, therefore penalizing the
|
||||
* lighter, if otoh 'se' > 'curr' then the resulting gran will
|
||||
* be smaller, again penalizing the lighter task.
|
||||
*
|
||||
* This is especially important for buddies when the leftmost
|
||||
* task is higher priority than the buddy.
|
||||
*/
|
||||
if (unlikely(se->load.weight != NICE_0_LOAD))
|
||||
gran = calc_delta_fair(gran, se);
|
||||
} else {
|
||||
if (unlikely(curr->load.weight != NICE_0_LOAD))
|
||||
gran = calc_delta_fair(gran, curr);
|
||||
}
|
||||
|
||||
return gran;
|
||||
}
|
||||
@@ -1350,7 +1397,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
if (vdiff <= 0)
|
||||
return -1;
|
||||
|
||||
gran = wakeup_gran(curr);
|
||||
gran = wakeup_gran(curr, se);
|
||||
if (vdiff > gran)
|
||||
return 1;
|
||||
|
||||
|
@@ -1,5 +1,6 @@
|
||||
SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 1)
|
||||
SCHED_FEAT(NORMALIZED_SLEEPER, 0)
|
||||
SCHED_FEAT(ADAPTIVE_GRAN, 1)
|
||||
SCHED_FEAT(WAKEUP_PREEMPT, 1)
|
||||
SCHED_FEAT(START_DEBIT, 1)
|
||||
SCHED_FEAT(AFFINE_WAKEUPS, 1)
|
||||
|
@@ -3,6 +3,40 @@
|
||||
* policies)
|
||||
*/
|
||||
|
||||
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return container_of(rt_se, struct task_struct, rt);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return rt_rq->rq;
|
||||
}
|
||||
|
||||
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return rt_se->rt_rq;
|
||||
}
|
||||
|
||||
#else /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return container_of(rt_rq, struct rq, rt);
|
||||
}
|
||||
|
||||
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
struct task_struct *p = rt_task_of(rt_se);
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
return &rq->rt;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static inline int rt_overloaded(struct rq *rq)
|
||||
@@ -37,25 +71,69 @@ static inline void rt_clear_overload(struct rq *rq)
|
||||
cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
|
||||
}
|
||||
|
||||
static void update_rt_migration(struct rq *rq)
|
||||
static void update_rt_migration(struct rt_rq *rt_rq)
|
||||
{
|
||||
if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
|
||||
if (!rq->rt.overloaded) {
|
||||
rt_set_overload(rq);
|
||||
rq->rt.overloaded = 1;
|
||||
if (rt_rq->rt_nr_migratory && (rt_rq->rt_nr_running > 1)) {
|
||||
if (!rt_rq->overloaded) {
|
||||
rt_set_overload(rq_of_rt_rq(rt_rq));
|
||||
rt_rq->overloaded = 1;
|
||||
}
|
||||
} else if (rq->rt.overloaded) {
|
||||
rt_clear_overload(rq);
|
||||
rq->rt.overloaded = 0;
|
||||
} else if (rt_rq->overloaded) {
|
||||
rt_clear_overload(rq_of_rt_rq(rt_rq));
|
||||
rt_rq->overloaded = 0;
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
|
||||
static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
return container_of(rt_se, struct task_struct, rt);
|
||||
if (rt_se->nr_cpus_allowed > 1)
|
||||
rt_rq->rt_nr_migratory++;
|
||||
|
||||
update_rt_migration(rt_rq);
|
||||
}
|
||||
|
||||
static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
if (rt_se->nr_cpus_allowed > 1)
|
||||
rt_rq->rt_nr_migratory--;
|
||||
|
||||
update_rt_migration(rt_rq);
|
||||
}
|
||||
|
||||
static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||
plist_node_init(&p->pushable_tasks, p->prio);
|
||||
plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
}
|
||||
|
||||
static inline
|
||||
void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return !list_empty(&rt_se->run_list);
|
||||
@@ -79,16 +157,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
|
||||
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
||||
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return rt_rq->rq;
|
||||
}
|
||||
|
||||
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return rt_se->rt_rq;
|
||||
}
|
||||
|
||||
#define for_each_sched_rt_entity(rt_se) \
|
||||
for (; rt_se; rt_se = rt_se->parent)
|
||||
|
||||
@@ -108,7 +176,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
if (rt_rq->rt_nr_running) {
|
||||
if (rt_se && !on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se);
|
||||
if (rt_rq->highest_prio < curr->prio)
|
||||
if (rt_rq->highest_prio.curr < curr->prio)
|
||||
resched_task(curr);
|
||||
}
|
||||
}
|
||||
@@ -176,19 +244,6 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
|
||||
#define for_each_leaf_rt_rq(rt_rq, rq) \
|
||||
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
|
||||
|
||||
static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
|
||||
{
|
||||
return container_of(rt_rq, struct rq, rt);
|
||||
}
|
||||
|
||||
static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
struct task_struct *p = rt_task_of(rt_se);
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
return &rq->rt;
|
||||
}
|
||||
|
||||
#define for_each_sched_rt_entity(rt_se) \
|
||||
for (; rt_se; rt_se = NULL)
|
||||
|
||||
@@ -473,7 +528,7 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se)
|
||||
struct rt_rq *rt_rq = group_rt_rq(rt_se);
|
||||
|
||||
if (rt_rq)
|
||||
return rt_rq->highest_prio;
|
||||
return rt_rq->highest_prio.curr;
|
||||
#endif
|
||||
|
||||
return rt_task_of(rt_se)->prio;
|
||||
@@ -547,91 +602,174 @@ static void update_curr_rt(struct rq *rq)
|
||||
}
|
||||
}
|
||||
|
||||
static inline
|
||||
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
#if defined CONFIG_SMP
|
||||
|
||||
static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
|
||||
|
||||
static inline int next_prio(struct rq *rq)
|
||||
{
|
||||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||
rt_rq->rt_nr_running++;
|
||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||
if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
|
||||
#ifdef CONFIG_SMP
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
#endif
|
||||
struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
|
||||
|
||||
if (next && rt_prio(next->prio))
|
||||
return next->prio;
|
||||
else
|
||||
return MAX_RT_PRIO;
|
||||
}
|
||||
|
||||
static void
|
||||
inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
if (prio < prev_prio) {
|
||||
|
||||
/*
|
||||
* If the new task is higher in priority than anything on the
|
||||
* run-queue, we know that the previous high becomes our
|
||||
* next-highest.
|
||||
*/
|
||||
rt_rq->highest_prio.next = prev_prio;
|
||||
|
||||
rt_rq->highest_prio = rt_se_prio(rt_se);
|
||||
#ifdef CONFIG_SMP
|
||||
if (rq->online)
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu,
|
||||
rt_se_prio(rt_se));
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
if (rt_se->nr_cpus_allowed > 1) {
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
|
||||
|
||||
rq->rt.rt_nr_migratory++;
|
||||
}
|
||||
} else if (prio == rt_rq->highest_prio.curr)
|
||||
/*
|
||||
* If the next task is equal in priority to the highest on
|
||||
* the run-queue, then we implicitly know that the next highest
|
||||
* task cannot be any lower than current
|
||||
*/
|
||||
rt_rq->highest_prio.next = prio;
|
||||
else if (prio < rt_rq->highest_prio.next)
|
||||
/*
|
||||
* Otherwise, we need to recompute next-highest
|
||||
*/
|
||||
rt_rq->highest_prio.next = next_prio(rq);
|
||||
}
|
||||
|
||||
static void
|
||||
dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
|
||||
rt_rq->highest_prio.next = next_prio(rq);
|
||||
|
||||
if (rq->online && rt_rq->highest_prio.curr != prev_prio)
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
|
||||
}
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
static inline
|
||||
void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
|
||||
static inline
|
||||
void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||
static void
|
||||
inc_rt_prio(struct rt_rq *rt_rq, int prio)
|
||||
{
|
||||
int prev_prio = rt_rq->highest_prio.curr;
|
||||
|
||||
if (prio < prev_prio)
|
||||
rt_rq->highest_prio.curr = prio;
|
||||
|
||||
inc_rt_prio_smp(rt_rq, prio, prev_prio);
|
||||
}
|
||||
|
||||
static void
|
||||
dec_rt_prio(struct rt_rq *rt_rq, int prio)
|
||||
{
|
||||
int prev_prio = rt_rq->highest_prio.curr;
|
||||
|
||||
if (rt_rq->rt_nr_running) {
|
||||
|
||||
WARN_ON(prio < prev_prio);
|
||||
|
||||
/*
|
||||
* This may have been our highest task, and therefore
|
||||
* we may have some recomputation to do
|
||||
*/
|
||||
if (prio == prev_prio) {
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
|
||||
rt_rq->highest_prio.curr =
|
||||
sched_find_first_bit(array->bitmap);
|
||||
}
|
||||
|
||||
} else
|
||||
rt_rq->highest_prio.curr = MAX_RT_PRIO;
|
||||
|
||||
dec_rt_prio_smp(rt_rq, prio, prev_prio);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
|
||||
static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
|
||||
|
||||
#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
update_rt_migration(rq_of_rt_rq(rt_rq));
|
||||
#endif
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
|
||||
static void
|
||||
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
if (rt_se_boosted(rt_se))
|
||||
rt_rq->rt_nr_boosted++;
|
||||
|
||||
if (rt_rq->tg)
|
||||
start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
|
||||
#else
|
||||
}
|
||||
|
||||
static void
|
||||
dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
if (rt_se_boosted(rt_se))
|
||||
rt_rq->rt_nr_boosted--;
|
||||
|
||||
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
|
||||
}
|
||||
|
||||
#else /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
static void
|
||||
inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
start_rt_bandwidth(&def_rt_bandwidth);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline
|
||||
void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
|
||||
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
static inline
|
||||
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
int prio = rt_se_prio(rt_se);
|
||||
|
||||
WARN_ON(!rt_prio(prio));
|
||||
rt_rq->rt_nr_running++;
|
||||
|
||||
inc_rt_prio(rt_rq, prio);
|
||||
inc_rt_migration(rt_se, rt_rq);
|
||||
inc_rt_group(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static inline
|
||||
void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
int highest_prio = rt_rq->highest_prio;
|
||||
#endif
|
||||
|
||||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||
WARN_ON(!rt_rq->rt_nr_running);
|
||||
rt_rq->rt_nr_running--;
|
||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||
if (rt_rq->rt_nr_running) {
|
||||
struct rt_prio_array *array;
|
||||
|
||||
WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
|
||||
if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
|
||||
/* recalculate */
|
||||
array = &rt_rq->active;
|
||||
rt_rq->highest_prio =
|
||||
sched_find_first_bit(array->bitmap);
|
||||
} /* otherwise leave rq->highest prio alone */
|
||||
} else
|
||||
rt_rq->highest_prio = MAX_RT_PRIO;
|
||||
#endif
|
||||
#ifdef CONFIG_SMP
|
||||
if (rt_se->nr_cpus_allowed > 1) {
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
rq->rt.rt_nr_migratory--;
|
||||
}
|
||||
|
||||
if (rt_rq->highest_prio != highest_prio) {
|
||||
struct rq *rq = rq_of_rt_rq(rt_rq);
|
||||
|
||||
if (rq->online)
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu,
|
||||
rt_rq->highest_prio);
|
||||
}
|
||||
|
||||
update_rt_migration(rq_of_rt_rq(rt_rq));
|
||||
#endif /* CONFIG_SMP */
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
if (rt_se_boosted(rt_se))
|
||||
rt_rq->rt_nr_boosted--;
|
||||
|
||||
WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
|
||||
#endif
|
||||
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
|
||||
dec_rt_migration(rt_se, rt_rq);
|
||||
dec_rt_group(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
@@ -718,6 +856,9 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
||||
|
||||
enqueue_rt_entity(rt_se);
|
||||
|
||||
if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
||||
inc_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
@@ -728,6 +869,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
||||
update_curr_rt(rq);
|
||||
dequeue_rt_entity(rt_se);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
dec_cpu_load(rq, p->se.load.weight);
|
||||
}
|
||||
|
||||
@@ -878,7 +1021,7 @@ static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
|
||||
return next;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_rt(struct rq *rq)
|
||||
static struct task_struct *_pick_next_task_rt(struct rq *rq)
|
||||
{
|
||||
struct sched_rt_entity *rt_se;
|
||||
struct task_struct *p;
|
||||
@@ -900,6 +1043,18 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
|
||||
|
||||
p = rt_task_of(rt_se);
|
||||
p->se.exec_start = rq->clock;
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_rt(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p = _pick_next_task_rt(rq);
|
||||
|
||||
/* The running task is never eligible for pushing */
|
||||
if (p)
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -907,6 +1062,13 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
update_curr_rt(rq);
|
||||
p->se.exec_start = 0;
|
||||
|
||||
/*
|
||||
* The previous task needs to be made eligible for pushing
|
||||
* if it is still active
|
||||
*/
|
||||
if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -960,12 +1122,13 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
|
||||
|
||||
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
|
||||
|
||||
static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
|
||||
static inline int pick_optimal_cpu(int this_cpu,
|
||||
const struct cpumask *mask)
|
||||
{
|
||||
int first;
|
||||
|
||||
/* "this_cpu" is cheaper to preempt than a remote processor */
|
||||
if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
|
||||
if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
|
||||
return this_cpu;
|
||||
|
||||
first = cpumask_first(mask);
|
||||
@@ -981,6 +1144,7 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
|
||||
int this_cpu = smp_processor_id();
|
||||
int cpu = task_cpu(task);
|
||||
cpumask_var_t domain_mask;
|
||||
|
||||
if (task->rt.nr_cpus_allowed == 1)
|
||||
return -1; /* No other targets possible */
|
||||
@@ -1013,19 +1177,25 @@ static int find_lowest_rq(struct task_struct *task)
|
||||
if (this_cpu == cpu)
|
||||
this_cpu = -1; /* Skip this_cpu opt if the same */
|
||||
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_AFFINE) {
|
||||
cpumask_t domain_mask;
|
||||
int best_cpu;
|
||||
if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
|
||||
for_each_domain(cpu, sd) {
|
||||
if (sd->flags & SD_WAKE_AFFINE) {
|
||||
int best_cpu;
|
||||
|
||||
cpumask_and(&domain_mask, sched_domain_span(sd),
|
||||
lowest_mask);
|
||||
cpumask_and(domain_mask,
|
||||
sched_domain_span(sd),
|
||||
lowest_mask);
|
||||
|
||||
best_cpu = pick_optimal_cpu(this_cpu,
|
||||
&domain_mask);
|
||||
if (best_cpu != -1)
|
||||
return best_cpu;
|
||||
best_cpu = pick_optimal_cpu(this_cpu,
|
||||
domain_mask);
|
||||
|
||||
if (best_cpu != -1) {
|
||||
free_cpumask_var(domain_mask);
|
||||
return best_cpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
free_cpumask_var(domain_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1072,7 +1242,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
}
|
||||
|
||||
/* If this rq is still suitable use it. */
|
||||
if (lowest_rq->rt.highest_prio > task->prio)
|
||||
if (lowest_rq->rt.highest_prio.curr > task->prio)
|
||||
break;
|
||||
|
||||
/* try again */
|
||||
@@ -1083,6 +1253,31 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
|
||||
return lowest_rq;
|
||||
}
|
||||
|
||||
static inline int has_pushable_tasks(struct rq *rq)
|
||||
{
|
||||
return !plist_head_empty(&rq->rt.pushable_tasks);
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_pushable_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
if (!has_pushable_tasks(rq))
|
||||
return NULL;
|
||||
|
||||
p = plist_first_entry(&rq->rt.pushable_tasks,
|
||||
struct task_struct, pushable_tasks);
|
||||
|
||||
BUG_ON(rq->cpu != task_cpu(p));
|
||||
BUG_ON(task_current(rq, p));
|
||||
BUG_ON(p->rt.nr_cpus_allowed <= 1);
|
||||
|
||||
BUG_ON(!p->se.on_rq);
|
||||
BUG_ON(!rt_task(p));
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the current CPU has more than one RT task, see if the non
|
||||
* running task can migrate over to a CPU that is running a task
|
||||
@@ -1092,13 +1287,11 @@ static int push_rt_task(struct rq *rq)
|
||||
{
|
||||
struct task_struct *next_task;
|
||||
struct rq *lowest_rq;
|
||||
int ret = 0;
|
||||
int paranoid = RT_MAX_TRIES;
|
||||
|
||||
if (!rq->rt.overloaded)
|
||||
return 0;
|
||||
|
||||
next_task = pick_next_highest_task_rt(rq, -1);
|
||||
next_task = pick_next_pushable_task(rq);
|
||||
if (!next_task)
|
||||
return 0;
|
||||
|
||||
@@ -1127,16 +1320,34 @@ static int push_rt_task(struct rq *rq)
|
||||
struct task_struct *task;
|
||||
/*
|
||||
* find lock_lowest_rq releases rq->lock
|
||||
* so it is possible that next_task has changed.
|
||||
* If it has, then try again.
|
||||
* so it is possible that next_task has migrated.
|
||||
*
|
||||
* We need to make sure that the task is still on the same
|
||||
* run-queue and is also still the next task eligible for
|
||||
* pushing.
|
||||
*/
|
||||
task = pick_next_highest_task_rt(rq, -1);
|
||||
if (unlikely(task != next_task) && task && paranoid--) {
|
||||
put_task_struct(next_task);
|
||||
next_task = task;
|
||||
goto retry;
|
||||
task = pick_next_pushable_task(rq);
|
||||
if (task_cpu(next_task) == rq->cpu && task == next_task) {
|
||||
/*
|
||||
* If we get here, the task hasnt moved at all, but
|
||||
* it has failed to push. We will not try again,
|
||||
* since the other cpus will pull from us when they
|
||||
* are ready.
|
||||
*/
|
||||
dequeue_pushable_task(rq, next_task);
|
||||
goto out;
|
||||
}
|
||||
goto out;
|
||||
|
||||
if (!task)
|
||||
/* No more tasks, just exit */
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Something has shifted, try again.
|
||||
*/
|
||||
put_task_struct(next_task);
|
||||
next_task = task;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
deactivate_task(rq, next_task, 0);
|
||||
@@ -1147,23 +1358,12 @@ static int push_rt_task(struct rq *rq)
|
||||
|
||||
double_unlock_balance(rq, lowest_rq);
|
||||
|
||||
ret = 1;
|
||||
out:
|
||||
put_task_struct(next_task);
|
||||
|
||||
return ret;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: Currently we just use the second highest prio task on
|
||||
* the queue, and stop when it can't migrate (or there's
|
||||
* no more RT tasks). There may be a case where a lower
|
||||
* priority RT task has a different affinity than the
|
||||
* higher RT task. In this case the lower RT task could
|
||||
* possibly be able to migrate where as the higher priority
|
||||
* RT task could not. We currently ignore this issue.
|
||||
* Enhancements are welcome!
|
||||
*/
|
||||
static void push_rt_tasks(struct rq *rq)
|
||||
{
|
||||
/* push_rt_task will return true if it moved an RT */
|
||||
@@ -1174,33 +1374,35 @@ static void push_rt_tasks(struct rq *rq)
|
||||
static int pull_rt_task(struct rq *this_rq)
|
||||
{
|
||||
int this_cpu = this_rq->cpu, ret = 0, cpu;
|
||||
struct task_struct *p, *next;
|
||||
struct task_struct *p;
|
||||
struct rq *src_rq;
|
||||
|
||||
if (likely(!rt_overloaded(this_rq)))
|
||||
return 0;
|
||||
|
||||
next = pick_next_task_rt(this_rq);
|
||||
|
||||
for_each_cpu(cpu, this_rq->rd->rto_mask) {
|
||||
if (this_cpu == cpu)
|
||||
continue;
|
||||
|
||||
src_rq = cpu_rq(cpu);
|
||||
|
||||
/*
|
||||
* Don't bother taking the src_rq->lock if the next highest
|
||||
* task is known to be lower-priority than our current task.
|
||||
* This may look racy, but if this value is about to go
|
||||
* logically higher, the src_rq will push this task away.
|
||||
* And if its going logically lower, we do not care
|
||||
*/
|
||||
if (src_rq->rt.highest_prio.next >=
|
||||
this_rq->rt.highest_prio.curr)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We can potentially drop this_rq's lock in
|
||||
* double_lock_balance, and another CPU could
|
||||
* steal our next task - hence we must cause
|
||||
* the caller to recalculate the next task
|
||||
* in that case:
|
||||
* alter this_rq
|
||||
*/
|
||||
if (double_lock_balance(this_rq, src_rq)) {
|
||||
struct task_struct *old_next = next;
|
||||
|
||||
next = pick_next_task_rt(this_rq);
|
||||
if (next != old_next)
|
||||
ret = 1;
|
||||
}
|
||||
double_lock_balance(this_rq, src_rq);
|
||||
|
||||
/*
|
||||
* Are there still pullable RT tasks?
|
||||
@@ -1214,7 +1416,7 @@ static int pull_rt_task(struct rq *this_rq)
|
||||
* Do we have an RT task that preempts
|
||||
* the to-be-scheduled task?
|
||||
*/
|
||||
if (p && (!next || (p->prio < next->prio))) {
|
||||
if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
|
||||
WARN_ON(p == src_rq->curr);
|
||||
WARN_ON(!p->se.on_rq);
|
||||
|
||||
@@ -1224,12 +1426,9 @@ static int pull_rt_task(struct rq *this_rq)
|
||||
* This is just that p is wakeing up and hasn't
|
||||
* had a chance to schedule. We only pull
|
||||
* p if it is lower in priority than the
|
||||
* current task on the run queue or
|
||||
* this_rq next task is lower in prio than
|
||||
* the current task on that rq.
|
||||
* current task on the run queue
|
||||
*/
|
||||
if (p->prio < src_rq->curr->prio ||
|
||||
(next && next->prio < src_rq->curr->prio))
|
||||
if (p->prio < src_rq->curr->prio)
|
||||
goto skip;
|
||||
|
||||
ret = 1;
|
||||
@@ -1242,13 +1441,7 @@ static int pull_rt_task(struct rq *this_rq)
|
||||
* case there's an even higher prio task
|
||||
* in another runqueue. (low likelyhood
|
||||
* but possible)
|
||||
*
|
||||
* Update next so that we won't pick a task
|
||||
* on another cpu with a priority lower (or equal)
|
||||
* than the one we just picked.
|
||||
*/
|
||||
next = p;
|
||||
|
||||
}
|
||||
skip:
|
||||
double_unlock_balance(this_rq, src_rq);
|
||||
@@ -1260,24 +1453,27 @@ static int pull_rt_task(struct rq *this_rq)
|
||||
static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
/* Try to pull RT tasks here if we lower this rq's prio */
|
||||
if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
|
||||
if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio)
|
||||
pull_rt_task(rq);
|
||||
}
|
||||
|
||||
/*
|
||||
* assumes rq->lock is held
|
||||
*/
|
||||
static int needs_post_schedule_rt(struct rq *rq)
|
||||
{
|
||||
return has_pushable_tasks(rq);
|
||||
}
|
||||
|
||||
static void post_schedule_rt(struct rq *rq)
|
||||
{
|
||||
/*
|
||||
* If we have more than one rt_task queued, then
|
||||
* see if we can push the other rt_tasks off to other CPUS.
|
||||
* Note we may release the rq lock, and since
|
||||
* the lock was owned by prev, we need to release it
|
||||
* first via finish_lock_switch and then reaquire it here.
|
||||
* This is only called if needs_post_schedule_rt() indicates that
|
||||
* we need to push tasks away
|
||||
*/
|
||||
if (unlikely(rq->rt.overloaded)) {
|
||||
spin_lock_irq(&rq->lock);
|
||||
push_rt_tasks(rq);
|
||||
spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
spin_lock_irq(&rq->lock);
|
||||
push_rt_tasks(rq);
|
||||
spin_unlock_irq(&rq->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1288,7 +1484,8 @@ static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (!task_running(rq, p) &&
|
||||
!test_tsk_need_resched(rq->curr) &&
|
||||
rq->rt.overloaded)
|
||||
has_pushable_tasks(rq) &&
|
||||
p->rt.nr_cpus_allowed > 1)
|
||||
push_rt_tasks(rq);
|
||||
}
|
||||
|
||||
@@ -1324,6 +1521,24 @@ static void set_cpus_allowed_rt(struct task_struct *p,
|
||||
if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
if (!task_current(rq, p)) {
|
||||
/*
|
||||
* Make sure we dequeue this task from the pushable list
|
||||
* before going further. It will either remain off of
|
||||
* the list because we are no longer pushable, or it
|
||||
* will be requeued.
|
||||
*/
|
||||
if (p->rt.nr_cpus_allowed > 1)
|
||||
dequeue_pushable_task(rq, p);
|
||||
|
||||
/*
|
||||
* Requeue if our weight is changing and still > 1
|
||||
*/
|
||||
if (weight > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
|
||||
}
|
||||
|
||||
if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
|
||||
rq->rt.rt_nr_migratory++;
|
||||
} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
|
||||
@@ -1331,7 +1546,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
|
||||
rq->rt.rt_nr_migratory--;
|
||||
}
|
||||
|
||||
update_rt_migration(rq);
|
||||
update_rt_migration(&rq->rt);
|
||||
}
|
||||
|
||||
cpumask_copy(&p->cpus_allowed, new_mask);
|
||||
@@ -1346,7 +1561,7 @@ static void rq_online_rt(struct rq *rq)
|
||||
|
||||
__enable_runtime(rq);
|
||||
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
|
||||
cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
|
||||
}
|
||||
|
||||
/* Assumes rq->lock is held */
|
||||
@@ -1438,7 +1653,7 @@ static void prio_changed_rt(struct rq *rq, struct task_struct *p,
|
||||
* can release the rq lock and p could migrate.
|
||||
* Only reschedule if p is still on the same runqueue.
|
||||
*/
|
||||
if (p->prio > rq->rt.highest_prio && rq->curr == p)
|
||||
if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
|
||||
resched_task(p);
|
||||
#else
|
||||
/* For UP simply resched on drop of prio */
|
||||
@@ -1509,6 +1724,9 @@ static void set_curr_task_rt(struct rq *rq)
|
||||
struct task_struct *p = rq->curr;
|
||||
|
||||
p->se.exec_start = rq->clock;
|
||||
|
||||
/* The running task is never eligible for pushing */
|
||||
dequeue_pushable_task(rq, p);
|
||||
}
|
||||
|
||||
static const struct sched_class rt_sched_class = {
|
||||
@@ -1531,6 +1749,7 @@ static const struct sched_class rt_sched_class = {
|
||||
.rq_online = rq_online_rt,
|
||||
.rq_offline = rq_offline_rt,
|
||||
.pre_schedule = pre_schedule_rt,
|
||||
.needs_post_schedule = needs_post_schedule_rt,
|
||||
.post_schedule = post_schedule_rt,
|
||||
.task_wake_up = task_wake_up_rt,
|
||||
.switched_from = switched_from_rt,
|
||||
|
@@ -4,7 +4,7 @@
|
||||
* bump this up when changing the output format or the meaning of an existing
|
||||
* format, so that tools can adapt (or abort)
|
||||
*/
|
||||
#define SCHEDSTAT_VERSION 14
|
||||
#define SCHEDSTAT_VERSION 15
|
||||
|
||||
static int show_schedstat(struct seq_file *seq, void *v)
|
||||
{
|
||||
@@ -26,9 +26,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
|
||||
|
||||
/* runqueue-specific stats */
|
||||
seq_printf(seq,
|
||||
"cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
|
||||
cpu, rq->yld_both_empty,
|
||||
rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
|
||||
"cpu%d %u %u %u %u %u %u %llu %llu %lu",
|
||||
cpu, rq->yld_count,
|
||||
rq->sched_switch, rq->sched_count, rq->sched_goidle,
|
||||
rq->ttwu_count, rq->ttwu_local,
|
||||
rq->rq_cpu_time,
|
||||
|
@@ -796,6 +796,11 @@ int __init __weak early_irq_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init __weak arch_probe_nr_irqs(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __init __weak arch_early_irq_init(void)
|
||||
{
|
||||
return 0;
|
||||
|
@@ -170,7 +170,7 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
|
||||
* doesn't hit this CPU until we're ready. */
|
||||
get_cpu();
|
||||
for_each_online_cpu(i) {
|
||||
sm_work = percpu_ptr(stop_machine_work, i);
|
||||
sm_work = per_cpu_ptr(stop_machine_work, i);
|
||||
INIT_WORK(sm_work, stop_cpu);
|
||||
queue_work_on(i, stop_machine_wq, sm_work);
|
||||
}
|
||||
|
@@ -219,6 +219,7 @@ static const struct trans_ctl_table trans_net_ipv4_conf_vars_table[] = {
|
||||
{ NET_IPV4_CONF_ARP_IGNORE, "arp_ignore" },
|
||||
{ NET_IPV4_CONF_PROMOTE_SECONDARIES, "promote_secondaries" },
|
||||
{ NET_IPV4_CONF_ARP_ACCEPT, "arp_accept" },
|
||||
{ NET_IPV4_CONF_ARP_NOTIFY, "arp_notify" },
|
||||
{}
|
||||
};
|
||||
|
||||
|
@@ -1,4 +1,4 @@
|
||||
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
|
||||
obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
|
||||
|
||||
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
|
||||
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
|
||||
|
@@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_event_device *dev,
|
||||
if (dev->mode != mode) {
|
||||
dev->set_mode(mode, dev);
|
||||
dev->mode = mode;
|
||||
|
||||
/*
|
||||
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
|
||||
* on it, so fix it up and emit a warning:
|
||||
*/
|
||||
if (mode == CLOCK_EVT_MODE_ONESHOT) {
|
||||
if (unlikely(!dev->mult)) {
|
||||
dev->mult = 1;
|
||||
WARN_ON(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,15 +179,6 @@ void clockevents_register_device(struct clock_event_device *dev)
|
||||
BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
|
||||
BUG_ON(!dev->cpumask);
|
||||
|
||||
/*
|
||||
* A nsec2cyc multiplicator of 0 is invalid and we'd crash
|
||||
* on it, so fix it up and emit a warning:
|
||||
*/
|
||||
if (unlikely(!dev->mult)) {
|
||||
dev->mult = 1;
|
||||
WARN_ON(1);
|
||||
}
|
||||
|
||||
spin_lock(&clockevents_lock);
|
||||
|
||||
list_add(&dev->list, &clockevent_devices);
|
||||
|
@@ -31,6 +31,82 @@
|
||||
#include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
|
||||
#include <linux/tick.h>
|
||||
|
||||
void timecounter_init(struct timecounter *tc,
|
||||
const struct cyclecounter *cc,
|
||||
u64 start_tstamp)
|
||||
{
|
||||
tc->cc = cc;
|
||||
tc->cycle_last = cc->read(cc);
|
||||
tc->nsec = start_tstamp;
|
||||
}
|
||||
EXPORT_SYMBOL(timecounter_init);
|
||||
|
||||
/**
|
||||
* timecounter_read_delta - get nanoseconds since last call of this function
|
||||
* @tc: Pointer to time counter
|
||||
*
|
||||
* When the underlying cycle counter runs over, this will be handled
|
||||
* correctly as long as it does not run over more than once between
|
||||
* calls.
|
||||
*
|
||||
* The first call to this function for a new time counter initializes
|
||||
* the time tracking and returns an undefined result.
|
||||
*/
|
||||
static u64 timecounter_read_delta(struct timecounter *tc)
|
||||
{
|
||||
cycle_t cycle_now, cycle_delta;
|
||||
u64 ns_offset;
|
||||
|
||||
/* read cycle counter: */
|
||||
cycle_now = tc->cc->read(tc->cc);
|
||||
|
||||
/* calculate the delta since the last timecounter_read_delta(): */
|
||||
cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
|
||||
|
||||
/* convert to nanoseconds: */
|
||||
ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
|
||||
|
||||
/* update time stamp of timecounter_read_delta() call: */
|
||||
tc->cycle_last = cycle_now;
|
||||
|
||||
return ns_offset;
|
||||
}
|
||||
|
||||
u64 timecounter_read(struct timecounter *tc)
|
||||
{
|
||||
u64 nsec;
|
||||
|
||||
/* increment time by nanoseconds since last call */
|
||||
nsec = timecounter_read_delta(tc);
|
||||
nsec += tc->nsec;
|
||||
tc->nsec = nsec;
|
||||
|
||||
return nsec;
|
||||
}
|
||||
EXPORT_SYMBOL(timecounter_read);
|
||||
|
||||
u64 timecounter_cyc2time(struct timecounter *tc,
|
||||
cycle_t cycle_tstamp)
|
||||
{
|
||||
u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
|
||||
u64 nsec;
|
||||
|
||||
/*
|
||||
* Instead of always treating cycle_tstamp as more recent
|
||||
* than tc->cycle_last, detect when it is too far in the
|
||||
* future and treat it as old time stamp instead.
|
||||
*/
|
||||
if (cycle_delta > tc->cc->mask / 2) {
|
||||
cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
|
||||
nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
|
||||
} else {
|
||||
nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
|
||||
}
|
||||
|
||||
return nsec;
|
||||
}
|
||||
EXPORT_SYMBOL(timecounter_cyc2time);
|
||||
|
||||
/* XXX - Would like a better way for initializing curr_clocksource */
|
||||
extern struct clocksource clocksource_jiffies;
|
||||
|
||||
|
@@ -1,71 +1,129 @@
|
||||
/*
|
||||
* linux/kernel/time/ntp.c
|
||||
*
|
||||
* NTP state machine interfaces and logic.
|
||||
*
|
||||
* This code was mainly moved from kernel/timer.c and kernel/time.c
|
||||
* Please see those files for relevant copyright info and historical
|
||||
* changelogs.
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/capability.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <asm/timex.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
/*
|
||||
* Timekeeping variables
|
||||
* NTP timekeeping variables:
|
||||
*/
|
||||
unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */
|
||||
unsigned long tick_nsec; /* ACTHZ period (nsec) */
|
||||
u64 tick_length;
|
||||
static u64 tick_length_base;
|
||||
|
||||
static struct hrtimer leap_timer;
|
||||
/* USER_HZ period (usecs): */
|
||||
unsigned long tick_usec = TICK_USEC;
|
||||
|
||||
#define MAX_TICKADJ 500 /* microsecs */
|
||||
#define MAX_TICKADJ_SCALED (((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
|
||||
NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
|
||||
/* ACTHZ period (nsecs): */
|
||||
unsigned long tick_nsec;
|
||||
|
||||
u64 tick_length;
|
||||
static u64 tick_length_base;
|
||||
|
||||
static struct hrtimer leap_timer;
|
||||
|
||||
#define MAX_TICKADJ 500LL /* usecs */
|
||||
#define MAX_TICKADJ_SCALED \
|
||||
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
|
||||
|
||||
/*
|
||||
* phase-lock loop variables
|
||||
*/
|
||||
/* TIME_ERROR prevents overwriting the CMOS clock */
|
||||
static int time_state = TIME_OK; /* clock synchronization status */
|
||||
int time_status = STA_UNSYNC; /* clock status bits */
|
||||
static long time_tai; /* TAI offset (s) */
|
||||
static s64 time_offset; /* time adjustment (ns) */
|
||||
static long time_constant = 2; /* pll time constant */
|
||||
long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
|
||||
long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
|
||||
static s64 time_freq; /* frequency offset (scaled ns/s)*/
|
||||
static long time_reftime; /* time at last adjustment (s) */
|
||||
long time_adjust;
|
||||
static long ntp_tick_adj;
|
||||
|
||||
/*
|
||||
* clock synchronization status
|
||||
*
|
||||
* (TIME_ERROR prevents overwriting the CMOS clock)
|
||||
*/
|
||||
static int time_state = TIME_OK;
|
||||
|
||||
/* clock status bits: */
|
||||
int time_status = STA_UNSYNC;
|
||||
|
||||
/* TAI offset (secs): */
|
||||
static long time_tai;
|
||||
|
||||
/* time adjustment (nsecs): */
|
||||
static s64 time_offset;
|
||||
|
||||
/* pll time constant: */
|
||||
static long time_constant = 2;
|
||||
|
||||
/* maximum error (usecs): */
|
||||
long time_maxerror = NTP_PHASE_LIMIT;
|
||||
|
||||
/* estimated error (usecs): */
|
||||
long time_esterror = NTP_PHASE_LIMIT;
|
||||
|
||||
/* frequency offset (scaled nsecs/secs): */
|
||||
static s64 time_freq;
|
||||
|
||||
/* time at last adjustment (secs): */
|
||||
static long time_reftime;
|
||||
|
||||
long time_adjust;
|
||||
|
||||
/* constant (boot-param configurable) NTP tick adjustment (upscaled) */
|
||||
static s64 ntp_tick_adj;
|
||||
|
||||
/*
|
||||
* NTP methods:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Update (tick_length, tick_length_base, tick_nsec), based
|
||||
* on (tick_usec, ntp_tick_adj, time_freq):
|
||||
*/
|
||||
static void ntp_update_frequency(void)
|
||||
{
|
||||
u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
|
||||
<< NTP_SCALE_SHIFT;
|
||||
second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT;
|
||||
second_length += time_freq;
|
||||
u64 second_length;
|
||||
u64 new_base;
|
||||
|
||||
tick_length_base = second_length;
|
||||
second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
|
||||
<< NTP_SCALE_SHIFT;
|
||||
|
||||
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
|
||||
tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ);
|
||||
second_length += ntp_tick_adj;
|
||||
second_length += time_freq;
|
||||
|
||||
tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
|
||||
new_base = div_u64(second_length, NTP_INTERVAL_FREQ);
|
||||
|
||||
/*
|
||||
* Don't wait for the next second_overflow, apply
|
||||
* the change to the tick length immediately:
|
||||
*/
|
||||
tick_length += new_base - tick_length_base;
|
||||
tick_length_base = new_base;
|
||||
}
|
||||
|
||||
static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
|
||||
{
|
||||
time_status &= ~STA_MODE;
|
||||
|
||||
if (secs < MINSEC)
|
||||
return 0;
|
||||
|
||||
if (!(time_status & STA_FLL) && (secs <= MAXSEC))
|
||||
return 0;
|
||||
|
||||
time_status |= STA_MODE;
|
||||
|
||||
return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
|
||||
}
|
||||
|
||||
static void ntp_update_offset(long offset)
|
||||
{
|
||||
long mtemp;
|
||||
s64 freq_adj;
|
||||
s64 offset64;
|
||||
long secs;
|
||||
|
||||
if (!(time_status & STA_PLL))
|
||||
return;
|
||||
@@ -84,24 +142,23 @@ static void ntp_update_offset(long offset)
|
||||
* Select how the frequency is to be controlled
|
||||
* and in which mode (PLL or FLL).
|
||||
*/
|
||||
if (time_status & STA_FREQHOLD || time_reftime == 0)
|
||||
time_reftime = xtime.tv_sec;
|
||||
mtemp = xtime.tv_sec - time_reftime;
|
||||
secs = xtime.tv_sec - time_reftime;
|
||||
if (unlikely(time_status & STA_FREQHOLD))
|
||||
secs = 0;
|
||||
|
||||
time_reftime = xtime.tv_sec;
|
||||
|
||||
freq_adj = (s64)offset * mtemp;
|
||||
freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant);
|
||||
time_status &= ~STA_MODE;
|
||||
if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
|
||||
freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL),
|
||||
mtemp);
|
||||
time_status |= STA_MODE;
|
||||
}
|
||||
freq_adj += time_freq;
|
||||
freq_adj = min(freq_adj, MAXFREQ_SCALED);
|
||||
time_freq = max(freq_adj, -MAXFREQ_SCALED);
|
||||
offset64 = offset;
|
||||
freq_adj = (offset64 * secs) <<
|
||||
(NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
|
||||
|
||||
time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
|
||||
freq_adj += ntp_update_offset_fll(offset64, secs);
|
||||
|
||||
freq_adj = min(freq_adj + time_freq, MAXFREQ_SCALED);
|
||||
|
||||
time_freq = max(freq_adj, -MAXFREQ_SCALED);
|
||||
|
||||
time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -111,15 +168,15 @@ static void ntp_update_offset(long offset)
|
||||
*/
|
||||
void ntp_clear(void)
|
||||
{
|
||||
time_adjust = 0; /* stop active adjtime() */
|
||||
time_status |= STA_UNSYNC;
|
||||
time_maxerror = NTP_PHASE_LIMIT;
|
||||
time_esterror = NTP_PHASE_LIMIT;
|
||||
time_adjust = 0; /* stop active adjtime() */
|
||||
time_status |= STA_UNSYNC;
|
||||
time_maxerror = NTP_PHASE_LIMIT;
|
||||
time_esterror = NTP_PHASE_LIMIT;
|
||||
|
||||
ntp_update_frequency();
|
||||
|
||||
tick_length = tick_length_base;
|
||||
time_offset = 0;
|
||||
tick_length = tick_length_base;
|
||||
time_offset = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -140,8 +197,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
||||
xtime.tv_sec--;
|
||||
wall_to_monotonic.tv_sec++;
|
||||
time_state = TIME_OOP;
|
||||
printk(KERN_NOTICE "Clock: "
|
||||
"inserting leap second 23:59:60 UTC\n");
|
||||
printk(KERN_NOTICE
|
||||
"Clock: inserting leap second 23:59:60 UTC\n");
|
||||
hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
|
||||
res = HRTIMER_RESTART;
|
||||
break;
|
||||
@@ -150,8 +207,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
||||
time_tai--;
|
||||
wall_to_monotonic.tv_sec--;
|
||||
time_state = TIME_WAIT;
|
||||
printk(KERN_NOTICE "Clock: "
|
||||
"deleting leap second 23:59:59 UTC\n");
|
||||
printk(KERN_NOTICE
|
||||
"Clock: deleting leap second 23:59:59 UTC\n");
|
||||
break;
|
||||
case TIME_OOP:
|
||||
time_tai++;
|
||||
@@ -179,7 +236,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
|
||||
*/
|
||||
void second_overflow(void)
|
||||
{
|
||||
s64 time_adj;
|
||||
s64 delta;
|
||||
|
||||
/* Bump the maxerror field */
|
||||
time_maxerror += MAXFREQ / NSEC_PER_USEC;
|
||||
@@ -192,24 +249,30 @@ void second_overflow(void)
|
||||
* Compute the phase adjustment for the next second. The offset is
|
||||
* reduced by a fixed factor times the time constant.
|
||||
*/
|
||||
tick_length = tick_length_base;
|
||||
time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
|
||||
time_offset -= time_adj;
|
||||
tick_length += time_adj;
|
||||
tick_length = tick_length_base;
|
||||
|
||||
if (unlikely(time_adjust)) {
|
||||
if (time_adjust > MAX_TICKADJ) {
|
||||
time_adjust -= MAX_TICKADJ;
|
||||
tick_length += MAX_TICKADJ_SCALED;
|
||||
} else if (time_adjust < -MAX_TICKADJ) {
|
||||
time_adjust += MAX_TICKADJ;
|
||||
tick_length -= MAX_TICKADJ_SCALED;
|
||||
} else {
|
||||
tick_length += (s64)(time_adjust * NSEC_PER_USEC /
|
||||
NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT;
|
||||
time_adjust = 0;
|
||||
}
|
||||
delta = shift_right(time_offset, SHIFT_PLL + time_constant);
|
||||
time_offset -= delta;
|
||||
tick_length += delta;
|
||||
|
||||
if (!time_adjust)
|
||||
return;
|
||||
|
||||
if (time_adjust > MAX_TICKADJ) {
|
||||
time_adjust -= MAX_TICKADJ;
|
||||
tick_length += MAX_TICKADJ_SCALED;
|
||||
return;
|
||||
}
|
||||
|
||||
if (time_adjust < -MAX_TICKADJ) {
|
||||
time_adjust += MAX_TICKADJ;
|
||||
tick_length -= MAX_TICKADJ_SCALED;
|
||||
return;
|
||||
}
|
||||
|
||||
tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
|
||||
<< NTP_SCALE_SHIFT;
|
||||
time_adjust = 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GENERIC_CMOS_UPDATE
|
||||
@@ -233,12 +296,13 @@ static void sync_cmos_clock(struct work_struct *work)
|
||||
* This code is run on a timer. If the clock is set, that timer
|
||||
* may not expire at the correct time. Thus, we adjust...
|
||||
*/
|
||||
if (!ntp_synced())
|
||||
if (!ntp_synced()) {
|
||||
/*
|
||||
* Not synced, exit, do not restart a timer (if one is
|
||||
* running, let it run out).
|
||||
*/
|
||||
return;
|
||||
}
|
||||
|
||||
getnstimeofday(&now);
|
||||
if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
|
||||
@@ -270,7 +334,116 @@ static void notify_cmos_timer(void)
|
||||
static inline void notify_cmos_timer(void) { }
|
||||
#endif
|
||||
|
||||
/* adjtimex mainly allows reading (and writing, if superuser) of
|
||||
/*
|
||||
* Start the leap seconds timer:
|
||||
*/
|
||||
static inline void ntp_start_leap_timer(struct timespec *ts)
|
||||
{
|
||||
long now = ts->tv_sec;
|
||||
|
||||
if (time_status & STA_INS) {
|
||||
time_state = TIME_INS;
|
||||
now += 86400 - now % 86400;
|
||||
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
if (time_status & STA_DEL) {
|
||||
time_state = TIME_DEL;
|
||||
now += 86400 - (now + 1) % 86400;
|
||||
hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Propagate a new txc->status value into the NTP state:
|
||||
*/
|
||||
static inline void process_adj_status(struct timex *txc, struct timespec *ts)
|
||||
{
|
||||
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
|
||||
time_state = TIME_OK;
|
||||
time_status = STA_UNSYNC;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we turn on PLL adjustments then reset the
|
||||
* reference time to current time.
|
||||
*/
|
||||
if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
|
||||
time_reftime = xtime.tv_sec;
|
||||
|
||||
/* only set allowed bits */
|
||||
time_status &= STA_RONLY;
|
||||
time_status |= txc->status & ~STA_RONLY;
|
||||
|
||||
switch (time_state) {
|
||||
case TIME_OK:
|
||||
ntp_start_leap_timer(ts);
|
||||
break;
|
||||
case TIME_INS:
|
||||
case TIME_DEL:
|
||||
time_state = TIME_OK;
|
||||
ntp_start_leap_timer(ts);
|
||||
case TIME_WAIT:
|
||||
if (!(time_status & (STA_INS | STA_DEL)))
|
||||
time_state = TIME_OK;
|
||||
break;
|
||||
case TIME_OOP:
|
||||
hrtimer_restart(&leap_timer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Called with the xtime lock held, so we can access and modify
|
||||
* all the global NTP state:
|
||||
*/
|
||||
static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
|
||||
{
|
||||
if (txc->modes & ADJ_STATUS)
|
||||
process_adj_status(txc, ts);
|
||||
|
||||
if (txc->modes & ADJ_NANO)
|
||||
time_status |= STA_NANO;
|
||||
|
||||
if (txc->modes & ADJ_MICRO)
|
||||
time_status &= ~STA_NANO;
|
||||
|
||||
if (txc->modes & ADJ_FREQUENCY) {
|
||||
time_freq = txc->freq * PPM_SCALE;
|
||||
time_freq = min(time_freq, MAXFREQ_SCALED);
|
||||
time_freq = max(time_freq, -MAXFREQ_SCALED);
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_MAXERROR)
|
||||
time_maxerror = txc->maxerror;
|
||||
|
||||
if (txc->modes & ADJ_ESTERROR)
|
||||
time_esterror = txc->esterror;
|
||||
|
||||
if (txc->modes & ADJ_TIMECONST) {
|
||||
time_constant = txc->constant;
|
||||
if (!(time_status & STA_NANO))
|
||||
time_constant += 4;
|
||||
time_constant = min(time_constant, (long)MAXTC);
|
||||
time_constant = max(time_constant, 0l);
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_TAI && txc->constant > 0)
|
||||
time_tai = txc->constant;
|
||||
|
||||
if (txc->modes & ADJ_OFFSET)
|
||||
ntp_update_offset(txc->offset);
|
||||
|
||||
if (txc->modes & ADJ_TICK)
|
||||
tick_usec = txc->tick;
|
||||
|
||||
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
|
||||
ntp_update_frequency();
|
||||
}
|
||||
|
||||
/*
|
||||
* adjtimex mainly allows reading (and writing, if superuser) of
|
||||
* kernel time-keeping variables. used by xntpd.
|
||||
*/
|
||||
int do_adjtimex(struct timex *txc)
|
||||
@@ -291,11 +464,14 @@ int do_adjtimex(struct timex *txc)
|
||||
if (txc->modes && !capable(CAP_SYS_TIME))
|
||||
return -EPERM;
|
||||
|
||||
/* if the quartz is off by more than 10% something is VERY wrong! */
|
||||
/*
|
||||
* if the quartz is off by more than 10% then
|
||||
* something is VERY wrong!
|
||||
*/
|
||||
if (txc->modes & ADJ_TICK &&
|
||||
(txc->tick < 900000/USER_HZ ||
|
||||
txc->tick > 1100000/USER_HZ))
|
||||
return -EINVAL;
|
||||
return -EINVAL;
|
||||
|
||||
if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
|
||||
hrtimer_cancel(&leap_timer);
|
||||
@@ -305,7 +481,6 @@ int do_adjtimex(struct timex *txc)
|
||||
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
|
||||
/* If there are input parameters, then process them */
|
||||
if (txc->modes & ADJ_ADJTIME) {
|
||||
long save_adjust = time_adjust;
|
||||
|
||||
@@ -315,98 +490,24 @@ int do_adjtimex(struct timex *txc)
|
||||
ntp_update_frequency();
|
||||
}
|
||||
txc->offset = save_adjust;
|
||||
goto adj_done;
|
||||
}
|
||||
if (txc->modes) {
|
||||
long sec;
|
||||
} else {
|
||||
|
||||
if (txc->modes & ADJ_STATUS) {
|
||||
if ((time_status & STA_PLL) &&
|
||||
!(txc->status & STA_PLL)) {
|
||||
time_state = TIME_OK;
|
||||
time_status = STA_UNSYNC;
|
||||
}
|
||||
/* only set allowed bits */
|
||||
time_status &= STA_RONLY;
|
||||
time_status |= txc->status & ~STA_RONLY;
|
||||
/* If there are input parameters, then process them: */
|
||||
if (txc->modes)
|
||||
process_adjtimex_modes(txc, &ts);
|
||||
|
||||
switch (time_state) {
|
||||
case TIME_OK:
|
||||
start_timer:
|
||||
sec = ts.tv_sec;
|
||||
if (time_status & STA_INS) {
|
||||
time_state = TIME_INS;
|
||||
sec += 86400 - sec % 86400;
|
||||
hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
|
||||
} else if (time_status & STA_DEL) {
|
||||
time_state = TIME_DEL;
|
||||
sec += 86400 - (sec + 1) % 86400;
|
||||
hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
|
||||
}
|
||||
break;
|
||||
case TIME_INS:
|
||||
case TIME_DEL:
|
||||
time_state = TIME_OK;
|
||||
goto start_timer;
|
||||
break;
|
||||
case TIME_WAIT:
|
||||
if (!(time_status & (STA_INS | STA_DEL)))
|
||||
time_state = TIME_OK;
|
||||
break;
|
||||
case TIME_OOP:
|
||||
hrtimer_restart(&leap_timer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_NANO)
|
||||
time_status |= STA_NANO;
|
||||
if (txc->modes & ADJ_MICRO)
|
||||
time_status &= ~STA_NANO;
|
||||
|
||||
if (txc->modes & ADJ_FREQUENCY) {
|
||||
time_freq = (s64)txc->freq * PPM_SCALE;
|
||||
time_freq = min(time_freq, MAXFREQ_SCALED);
|
||||
time_freq = max(time_freq, -MAXFREQ_SCALED);
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_MAXERROR)
|
||||
time_maxerror = txc->maxerror;
|
||||
if (txc->modes & ADJ_ESTERROR)
|
||||
time_esterror = txc->esterror;
|
||||
|
||||
if (txc->modes & ADJ_TIMECONST) {
|
||||
time_constant = txc->constant;
|
||||
if (!(time_status & STA_NANO))
|
||||
time_constant += 4;
|
||||
time_constant = min(time_constant, (long)MAXTC);
|
||||
time_constant = max(time_constant, 0l);
|
||||
}
|
||||
|
||||
if (txc->modes & ADJ_TAI && txc->constant > 0)
|
||||
time_tai = txc->constant;
|
||||
|
||||
if (txc->modes & ADJ_OFFSET)
|
||||
ntp_update_offset(txc->offset);
|
||||
if (txc->modes & ADJ_TICK)
|
||||
tick_usec = txc->tick;
|
||||
|
||||
if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
|
||||
ntp_update_frequency();
|
||||
}
|
||||
|
||||
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
|
||||
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
|
||||
NTP_SCALE_SHIFT);
|
||||
if (!(time_status & STA_NANO))
|
||||
txc->offset /= NSEC_PER_USEC;
|
||||
if (!(time_status & STA_NANO))
|
||||
txc->offset /= NSEC_PER_USEC;
|
||||
}
|
||||
|
||||
adj_done:
|
||||
result = time_state; /* mostly `TIME_OK' */
|
||||
if (time_status & (STA_UNSYNC|STA_CLOCKERR))
|
||||
result = TIME_ERROR;
|
||||
|
||||
txc->freq = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
|
||||
(s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
|
||||
PPM_SCALE_INV, NTP_SCALE_SHIFT);
|
||||
txc->maxerror = time_maxerror;
|
||||
txc->esterror = time_esterror;
|
||||
txc->status = time_status;
|
||||
@@ -425,6 +526,7 @@ adj_done:
|
||||
txc->calcnt = 0;
|
||||
txc->errcnt = 0;
|
||||
txc->stbcnt = 0;
|
||||
|
||||
write_sequnlock_irq(&xtime_lock);
|
||||
|
||||
txc->time.tv_sec = ts.tv_sec;
|
||||
@@ -440,6 +542,8 @@ adj_done:
|
||||
static int __init ntp_tick_adj_setup(char *str)
|
||||
{
|
||||
ntp_tick_adj = simple_strtol(str, NULL, 0);
|
||||
ntp_tick_adj <<= NTP_SCALE_SHIFT;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
191
kernel/time/timecompare.c
Normal file
191
kernel/time/timecompare.c
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (C) 2009 Intel Corporation.
|
||||
* Author: Patrick Ohly <patrick.ohly@intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*/
|
||||
|
||||
#include <linux/timecompare.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/math64.h>
|
||||
|
||||
/*
|
||||
* fixed point arithmetic scale factor for skew
|
||||
*
|
||||
* Usually one would measure skew in ppb (parts per billion, 1e9), but
|
||||
* using a factor of 2 simplifies the math.
|
||||
*/
|
||||
#define TIMECOMPARE_SKEW_RESOLUTION (((s64)1)<<30)
|
||||
|
||||
ktime_t timecompare_transform(struct timecompare *sync,
|
||||
u64 source_tstamp)
|
||||
{
|
||||
u64 nsec;
|
||||
|
||||
nsec = source_tstamp + sync->offset;
|
||||
nsec += (s64)(source_tstamp - sync->last_update) * sync->skew /
|
||||
TIMECOMPARE_SKEW_RESOLUTION;
|
||||
|
||||
return ns_to_ktime(nsec);
|
||||
}
|
||||
EXPORT_SYMBOL(timecompare_transform);
|
||||
|
||||
int timecompare_offset(struct timecompare *sync,
|
||||
s64 *offset,
|
||||
u64 *source_tstamp)
|
||||
{
|
||||
u64 start_source = 0, end_source = 0;
|
||||
struct {
|
||||
s64 offset;
|
||||
s64 duration_target;
|
||||
} buffer[10], sample, *samples;
|
||||
int counter = 0, i;
|
||||
int used;
|
||||
int index;
|
||||
int num_samples = sync->num_samples;
|
||||
|
||||
if (num_samples > sizeof(buffer)/sizeof(buffer[0])) {
|
||||
samples = kmalloc(sizeof(*samples) * num_samples, GFP_ATOMIC);
|
||||
if (!samples) {
|
||||
samples = buffer;
|
||||
num_samples = sizeof(buffer)/sizeof(buffer[0]);
|
||||
}
|
||||
} else {
|
||||
samples = buffer;
|
||||
}
|
||||
|
||||
/* run until we have enough valid samples, but do not try forever */
|
||||
i = 0;
|
||||
counter = 0;
|
||||
while (1) {
|
||||
u64 ts;
|
||||
ktime_t start, end;
|
||||
|
||||
start = sync->target();
|
||||
ts = timecounter_read(sync->source);
|
||||
end = sync->target();
|
||||
|
||||
if (!i)
|
||||
start_source = ts;
|
||||
|
||||
/* ignore negative durations */
|
||||
sample.duration_target = ktime_to_ns(ktime_sub(end, start));
|
||||
if (sample.duration_target >= 0) {
|
||||
/*
|
||||
* assume symetric delay to and from source:
|
||||
* average target time corresponds to measured
|
||||
* source time
|
||||
*/
|
||||
sample.offset =
|
||||
ktime_to_ns(ktime_add(end, start)) / 2 -
|
||||
ts;
|
||||
|
||||
/* simple insertion sort based on duration */
|
||||
index = counter - 1;
|
||||
while (index >= 0) {
|
||||
if (samples[index].duration_target <
|
||||
sample.duration_target)
|
||||
break;
|
||||
samples[index + 1] = samples[index];
|
||||
index--;
|
||||
}
|
||||
samples[index + 1] = sample;
|
||||
counter++;
|
||||
}
|
||||
|
||||
i++;
|
||||
if (counter >= num_samples || i >= 100000) {
|
||||
end_source = ts;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*source_tstamp = (end_source + start_source) / 2;
|
||||
|
||||
/* remove outliers by only using 75% of the samples */
|
||||
used = counter * 3 / 4;
|
||||
if (!used)
|
||||
used = counter;
|
||||
if (used) {
|
||||
/* calculate average */
|
||||
s64 off = 0;
|
||||
for (index = 0; index < used; index++)
|
||||
off += samples[index].offset;
|
||||
*offset = div_s64(off, used);
|
||||
}
|
||||
|
||||
if (samples && samples != buffer)
|
||||
kfree(samples);
|
||||
|
||||
return used;
|
||||
}
|
||||
EXPORT_SYMBOL(timecompare_offset);
|
||||
|
||||
void __timecompare_update(struct timecompare *sync,
|
||||
u64 source_tstamp)
|
||||
{
|
||||
s64 offset;
|
||||
u64 average_time;
|
||||
|
||||
if (!timecompare_offset(sync, &offset, &average_time))
|
||||
return;
|
||||
|
||||
if (!sync->last_update) {
|
||||
sync->last_update = average_time;
|
||||
sync->offset = offset;
|
||||
sync->skew = 0;
|
||||
} else {
|
||||
s64 delta_nsec = average_time - sync->last_update;
|
||||
|
||||
/* avoid division by negative or small deltas */
|
||||
if (delta_nsec >= 10000) {
|
||||
s64 delta_offset_nsec = offset - sync->offset;
|
||||
s64 skew; /* delta_offset_nsec *
|
||||
TIMECOMPARE_SKEW_RESOLUTION /
|
||||
delta_nsec */
|
||||
u64 divisor;
|
||||
|
||||
/* div_s64() is limited to 32 bit divisor */
|
||||
skew = delta_offset_nsec * TIMECOMPARE_SKEW_RESOLUTION;
|
||||
divisor = delta_nsec;
|
||||
while (unlikely(divisor >= ((s64)1) << 32)) {
|
||||
/* divide both by 2; beware, right shift
|
||||
of negative value has undefined
|
||||
behavior and can only be used for
|
||||
the positive divisor */
|
||||
skew = div_s64(skew, 2);
|
||||
divisor >>= 1;
|
||||
}
|
||||
skew = div_s64(skew, divisor);
|
||||
|
||||
/*
|
||||
* Calculate new overall skew as 4/16 the
|
||||
* old value and 12/16 the new one. This is
|
||||
* a rather arbitrary tradeoff between
|
||||
* only using the latest measurement (0/16 and
|
||||
* 16/16) and even more weight on past measurements.
|
||||
*/
|
||||
#define TIMECOMPARE_NEW_SKEW_PER_16 12
|
||||
sync->skew =
|
||||
div_s64((16 - TIMECOMPARE_NEW_SKEW_PER_16) *
|
||||
sync->skew +
|
||||
TIMECOMPARE_NEW_SKEW_PER_16 * skew,
|
||||
16);
|
||||
sync->last_update = average_time;
|
||||
sync->offset = offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__timecompare_update);
|
126
kernel/timer.c
126
kernel/timer.c
@@ -589,11 +589,14 @@ static struct tvec_base *lock_timer_base(struct timer_list *timer,
|
||||
}
|
||||
}
|
||||
|
||||
int __mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
static inline int
|
||||
__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
|
||||
{
|
||||
struct tvec_base *base, *new_base;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
ret = 0;
|
||||
|
||||
timer_stats_timer_set_start_info(timer);
|
||||
BUG_ON(!timer->function);
|
||||
@@ -603,6 +606,9 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
if (timer_pending(timer)) {
|
||||
detach_timer(timer, 0);
|
||||
ret = 1;
|
||||
} else {
|
||||
if (pending_only)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
debug_timer_activate(timer);
|
||||
@@ -629,12 +635,83 @@ int __mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
|
||||
timer->expires = expires;
|
||||
internal_add_timer(base, timer);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__mod_timer);
|
||||
/**
|
||||
* mod_timer_pending - modify a pending timer's timeout
|
||||
* @timer: the pending timer to be modified
|
||||
* @expires: new timeout in jiffies
|
||||
*
|
||||
* mod_timer_pending() is the same for pending timers as mod_timer(),
|
||||
* but will not re-activate and modify already deleted timers.
|
||||
*
|
||||
* It is useful for unserialized use of timers.
|
||||
*/
|
||||
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
return __mod_timer(timer, expires, true);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_timer_pending);
|
||||
|
||||
/**
|
||||
* mod_timer - modify a timer's timeout
|
||||
* @timer: the timer to be modified
|
||||
* @expires: new timeout in jiffies
|
||||
*
|
||||
* mod_timer() is a more efficient way to update the expire field of an
|
||||
* active timer (if the timer is inactive it will be activated)
|
||||
*
|
||||
* mod_timer(timer, expires) is equivalent to:
|
||||
*
|
||||
* del_timer(timer); timer->expires = expires; add_timer(timer);
|
||||
*
|
||||
* Note that if there are multiple unserialized concurrent users of the
|
||||
* same timer, then mod_timer() is the only safe way to modify the timeout,
|
||||
* since add_timer() cannot modify an already running timer.
|
||||
*
|
||||
* The function returns whether it has modified a pending timer or not.
|
||||
* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
|
||||
* active timer returns 1.)
|
||||
*/
|
||||
int mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
/*
|
||||
* This is a common optimization triggered by the
|
||||
* networking code - if the timer is re-modified
|
||||
* to be the same thing then just return:
|
||||
*/
|
||||
if (timer->expires == expires && timer_pending(timer))
|
||||
return 1;
|
||||
|
||||
return __mod_timer(timer, expires, false);
|
||||
}
|
||||
EXPORT_SYMBOL(mod_timer);
|
||||
|
||||
/**
|
||||
* add_timer - start a timer
|
||||
* @timer: the timer to be added
|
||||
*
|
||||
* The kernel will do a ->function(->data) callback from the
|
||||
* timer interrupt at the ->expires point in the future. The
|
||||
* current time is 'jiffies'.
|
||||
*
|
||||
* The timer's ->expires, ->function (and if the handler uses it, ->data)
|
||||
* fields must be set prior calling this function.
|
||||
*
|
||||
* Timers with an ->expires field in the past will be executed in the next
|
||||
* timer tick.
|
||||
*/
|
||||
void add_timer(struct timer_list *timer)
|
||||
{
|
||||
BUG_ON(timer_pending(timer));
|
||||
mod_timer(timer, timer->expires);
|
||||
}
|
||||
EXPORT_SYMBOL(add_timer);
|
||||
|
||||
/**
|
||||
* add_timer_on - start a timer on a particular CPU
|
||||
@@ -666,44 +743,6 @@ void add_timer_on(struct timer_list *timer, int cpu)
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* mod_timer - modify a timer's timeout
|
||||
* @timer: the timer to be modified
|
||||
* @expires: new timeout in jiffies
|
||||
*
|
||||
* mod_timer() is a more efficient way to update the expire field of an
|
||||
* active timer (if the timer is inactive it will be activated)
|
||||
*
|
||||
* mod_timer(timer, expires) is equivalent to:
|
||||
*
|
||||
* del_timer(timer); timer->expires = expires; add_timer(timer);
|
||||
*
|
||||
* Note that if there are multiple unserialized concurrent users of the
|
||||
* same timer, then mod_timer() is the only safe way to modify the timeout,
|
||||
* since add_timer() cannot modify an already running timer.
|
||||
*
|
||||
* The function returns whether it has modified a pending timer or not.
|
||||
* (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
|
||||
* active timer returns 1.)
|
||||
*/
|
||||
int mod_timer(struct timer_list *timer, unsigned long expires)
|
||||
{
|
||||
BUG_ON(!timer->function);
|
||||
|
||||
timer_stats_timer_set_start_info(timer);
|
||||
/*
|
||||
* This is a common optimization triggered by the
|
||||
* networking code - if the timer is re-modified
|
||||
* to be the same thing then just return:
|
||||
*/
|
||||
if (timer->expires == expires && timer_pending(timer))
|
||||
return 1;
|
||||
|
||||
return __mod_timer(timer, expires);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(mod_timer);
|
||||
|
||||
/**
|
||||
* del_timer - deactive a timer.
|
||||
* @timer: the timer to be deactivated
|
||||
@@ -733,7 +772,6 @@ int del_timer(struct timer_list *timer)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(del_timer);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -767,7 +805,6 @@ out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(try_to_del_timer_sync);
|
||||
|
||||
/**
|
||||
@@ -796,7 +833,6 @@ int del_timer_sync(struct timer_list *timer)
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(del_timer_sync);
|
||||
#endif
|
||||
|
||||
@@ -1268,7 +1304,7 @@ signed long __sched schedule_timeout(signed long timeout)
|
||||
expire = timeout + jiffies;
|
||||
|
||||
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
|
||||
__mod_timer(&timer, expire);
|
||||
__mod_timer(&timer, expire, false);
|
||||
schedule();
|
||||
del_singleshot_timer_sync(&timer);
|
||||
|
||||
|
@@ -20,7 +20,7 @@
|
||||
|
||||
struct user_namespace init_user_ns = {
|
||||
.kref = {
|
||||
.refcount = ATOMIC_INIT(1),
|
||||
.refcount = ATOMIC_INIT(2),
|
||||
},
|
||||
.creator = &root_user,
|
||||
};
|
||||
|
Reference in New Issue
Block a user