Merge branch 'linus' into sched/urgent, to pick up dependencies
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
1182
kernel/cpu.c
1182
kernel/cpu.c
File diff suppressed because it is too large
Load Diff
@@ -153,13 +153,11 @@ static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
|
||||
} else {
|
||||
kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
|
||||
__func__, bp->bp_addr);
|
||||
#ifdef CONFIG_DEBUG_RODATA
|
||||
if (!bp->bp_type) {
|
||||
kdb_printf("Software breakpoints are unavailable.\n"
|
||||
" Change the kernel CONFIG_DEBUG_RODATA=n\n"
|
||||
" Boot the kernel with rodata=off\n"
|
||||
" OR use hw breaks: help bph\n");
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
|
@@ -3112,17 +3112,6 @@ done:
|
||||
return rotate;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
bool perf_event_can_stop_tick(void)
|
||||
{
|
||||
if (atomic_read(&nr_freq_events) ||
|
||||
__this_cpu_read(perf_throttled_count))
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
void perf_event_task_tick(void)
|
||||
{
|
||||
struct list_head *head = this_cpu_ptr(&active_ctx_list);
|
||||
@@ -3133,6 +3122,7 @@ void perf_event_task_tick(void)
|
||||
|
||||
__this_cpu_inc(perf_throttled_seq);
|
||||
throttled = __this_cpu_xchg(perf_throttled_count, 0);
|
||||
tick_dep_clear_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
|
||||
|
||||
list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
|
||||
perf_adjust_freq_unthr_context(ctx, throttled);
|
||||
@@ -3564,6 +3554,28 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
|
||||
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
static DEFINE_SPINLOCK(nr_freq_lock);
|
||||
#endif
|
||||
|
||||
static void unaccount_freq_event_nohz(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
spin_lock(&nr_freq_lock);
|
||||
if (atomic_dec_and_test(&nr_freq_events))
|
||||
tick_nohz_dep_clear(TICK_DEP_BIT_PERF_EVENTS);
|
||||
spin_unlock(&nr_freq_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void unaccount_freq_event(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
unaccount_freq_event_nohz();
|
||||
else
|
||||
atomic_dec(&nr_freq_events);
|
||||
}
|
||||
|
||||
static void unaccount_event(struct perf_event *event)
|
||||
{
|
||||
bool dec = false;
|
||||
@@ -3580,7 +3592,7 @@ static void unaccount_event(struct perf_event *event)
|
||||
if (event->attr.task)
|
||||
atomic_dec(&nr_task_events);
|
||||
if (event->attr.freq)
|
||||
atomic_dec(&nr_freq_events);
|
||||
unaccount_freq_event();
|
||||
if (event->attr.context_switch) {
|
||||
dec = true;
|
||||
atomic_dec(&nr_switch_events);
|
||||
@@ -6424,9 +6436,9 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||
if (unlikely(throttle
|
||||
&& hwc->interrupts >= max_samples_per_tick)) {
|
||||
__this_cpu_inc(perf_throttled_count);
|
||||
tick_dep_set_cpu(smp_processor_id(), TICK_DEP_BIT_PERF_EVENTS);
|
||||
hwc->interrupts = MAX_INTERRUPTS;
|
||||
perf_log_throttle(event, 0);
|
||||
tick_nohz_full_kick();
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
@@ -6785,7 +6797,7 @@ static void swevent_hlist_release(struct swevent_htable *swhash)
|
||||
kfree_rcu(hlist, rcu_head);
|
||||
}
|
||||
|
||||
static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
|
||||
static void swevent_hlist_put_cpu(int cpu)
|
||||
{
|
||||
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
|
||||
|
||||
@@ -6797,15 +6809,15 @@ static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
|
||||
mutex_unlock(&swhash->hlist_mutex);
|
||||
}
|
||||
|
||||
static void swevent_hlist_put(struct perf_event *event)
|
||||
static void swevent_hlist_put(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
swevent_hlist_put_cpu(event, cpu);
|
||||
swevent_hlist_put_cpu(cpu);
|
||||
}
|
||||
|
||||
static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
|
||||
static int swevent_hlist_get_cpu(int cpu)
|
||||
{
|
||||
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
|
||||
int err = 0;
|
||||
@@ -6828,14 +6840,13 @@ exit:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int swevent_hlist_get(struct perf_event *event)
|
||||
static int swevent_hlist_get(void)
|
||||
{
|
||||
int err;
|
||||
int cpu, failed_cpu;
|
||||
int err, cpu, failed_cpu;
|
||||
|
||||
get_online_cpus();
|
||||
for_each_possible_cpu(cpu) {
|
||||
err = swevent_hlist_get_cpu(event, cpu);
|
||||
err = swevent_hlist_get_cpu(cpu);
|
||||
if (err) {
|
||||
failed_cpu = cpu;
|
||||
goto fail;
|
||||
@@ -6848,7 +6859,7 @@ fail:
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (cpu == failed_cpu)
|
||||
break;
|
||||
swevent_hlist_put_cpu(event, cpu);
|
||||
swevent_hlist_put_cpu(cpu);
|
||||
}
|
||||
|
||||
put_online_cpus();
|
||||
@@ -6864,7 +6875,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
|
||||
WARN_ON(event->parent);
|
||||
|
||||
static_key_slow_dec(&perf_swevent_enabled[event_id]);
|
||||
swevent_hlist_put(event);
|
||||
swevent_hlist_put();
|
||||
}
|
||||
|
||||
static int perf_swevent_init(struct perf_event *event)
|
||||
@@ -6895,7 +6906,7 @@ static int perf_swevent_init(struct perf_event *event)
|
||||
if (!event->parent) {
|
||||
int err;
|
||||
|
||||
err = swevent_hlist_get(event);
|
||||
err = swevent_hlist_get();
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@@ -7816,6 +7827,27 @@ static void account_event_cpu(struct perf_event *event, int cpu)
|
||||
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
|
||||
}
|
||||
|
||||
/* Freq events need the tick to stay alive (see perf_event_task_tick). */
|
||||
static void account_freq_event_nohz(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
/* Lock so we don't race with concurrent unaccount */
|
||||
spin_lock(&nr_freq_lock);
|
||||
if (atomic_inc_return(&nr_freq_events) == 1)
|
||||
tick_nohz_dep_set(TICK_DEP_BIT_PERF_EVENTS);
|
||||
spin_unlock(&nr_freq_lock);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void account_freq_event(void)
|
||||
{
|
||||
if (tick_nohz_full_enabled())
|
||||
account_freq_event_nohz();
|
||||
else
|
||||
atomic_inc(&nr_freq_events);
|
||||
}
|
||||
|
||||
|
||||
static void account_event(struct perf_event *event)
|
||||
{
|
||||
bool inc = false;
|
||||
@@ -7831,10 +7863,8 @@ static void account_event(struct perf_event *event)
|
||||
atomic_inc(&nr_comm_events);
|
||||
if (event->attr.task)
|
||||
atomic_inc(&nr_task_events);
|
||||
if (event->attr.freq) {
|
||||
if (atomic_inc_return(&nr_freq_events) == 1)
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
if (event->attr.freq)
|
||||
account_freq_event();
|
||||
if (event->attr.context_switch) {
|
||||
atomic_inc(&nr_switch_events);
|
||||
inc = true;
|
||||
@@ -8001,6 +8031,9 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||
}
|
||||
}
|
||||
|
||||
/* symmetric to unaccount_event() in _free_event() */
|
||||
account_event(event);
|
||||
|
||||
return event;
|
||||
|
||||
err_per_task:
|
||||
@@ -8364,8 +8397,6 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
}
|
||||
}
|
||||
|
||||
account_event(event);
|
||||
|
||||
/*
|
||||
* Special case software events and allow them to be part of
|
||||
* any hardware group.
|
||||
@@ -8662,8 +8693,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
/* Mark owner so we could distinguish it from user events. */
|
||||
event->owner = TASK_TOMBSTONE;
|
||||
|
||||
account_event(event);
|
||||
|
||||
ctx = find_get_context(event->pmu, task, event);
|
||||
if (IS_ERR(ctx)) {
|
||||
err = PTR_ERR(ctx);
|
||||
@@ -9447,6 +9476,7 @@ ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_sysfs_show);
|
||||
|
||||
static int __init perf_event_sysfs_init(void)
|
||||
{
|
||||
|
@@ -1178,6 +1178,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
|
||||
goto free_area;
|
||||
|
||||
area->xol_mapping.name = "[uprobes]";
|
||||
area->xol_mapping.fault = NULL;
|
||||
area->xol_mapping.pages = area->pages;
|
||||
area->pages[0] = alloc_page(GFP_HIGHUSER);
|
||||
if (!area->pages[0])
|
||||
|
139
kernel/futex.c
139
kernel/futex.c
@@ -124,16 +124,16 @@
|
||||
* futex_wait(futex, val);
|
||||
*
|
||||
* waiters++; (a)
|
||||
* mb(); (A) <-- paired with -.
|
||||
* |
|
||||
* lock(hash_bucket(futex)); |
|
||||
* |
|
||||
* uval = *futex; |
|
||||
* | *futex = newval;
|
||||
* | sys_futex(WAKE, futex);
|
||||
* | futex_wake(futex);
|
||||
* |
|
||||
* `-------> mb(); (B)
|
||||
* smp_mb(); (A) <-- paired with -.
|
||||
* |
|
||||
* lock(hash_bucket(futex)); |
|
||||
* |
|
||||
* uval = *futex; |
|
||||
* | *futex = newval;
|
||||
* | sys_futex(WAKE, futex);
|
||||
* | futex_wake(futex);
|
||||
* |
|
||||
* `--------> smp_mb(); (B)
|
||||
* if (uval == val)
|
||||
* queue();
|
||||
* unlock(hash_bucket(futex));
|
||||
@@ -334,7 +334,7 @@ static inline void futex_get_mm(union futex_key *key)
|
||||
/*
|
||||
* Ensure futex_get_mm() implies a full barrier such that
|
||||
* get_futex_key() implies a full barrier. This is relied upon
|
||||
* as full barrier (B), see the ordering comment above.
|
||||
* as smp_mb(); (B), see the ordering comment above.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
@@ -407,10 +407,10 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
ihold(key->shared.inode); /* implies MB (B) */
|
||||
ihold(key->shared.inode); /* implies smp_mb(); (B) */
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
futex_get_mm(key); /* implies MB (B) */
|
||||
futex_get_mm(key); /* implies smp_mb(); (B) */
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
@@ -418,7 +418,7 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
* mm, therefore the only purpose of calling get_futex_key_refs
|
||||
* is because we need the barrier for the lockless waiter check.
|
||||
*/
|
||||
smp_mb(); /* explicit MB (B) */
|
||||
smp_mb(); /* explicit smp_mb(); (B) */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -497,7 +497,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
|
||||
if (!fshared) {
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
get_futex_key_refs(key); /* implies MB (B) */
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -520,7 +520,20 @@ again:
|
||||
else
|
||||
err = 0;
|
||||
|
||||
lock_page(page);
|
||||
/*
|
||||
* The treatment of mapping from this point on is critical. The page
|
||||
* lock protects many things but in this context the page lock
|
||||
* stabilizes mapping, prevents inode freeing in the shared
|
||||
* file-backed region case and guards against movement to swap cache.
|
||||
*
|
||||
* Strictly speaking the page lock is not needed in all cases being
|
||||
* considered here and page lock forces unnecessarily serialization
|
||||
* From this point on, mapping will be re-verified if necessary and
|
||||
* page lock will be acquired only if it is unavoidable
|
||||
*/
|
||||
page = compound_head(page);
|
||||
mapping = READ_ONCE(page->mapping);
|
||||
|
||||
/*
|
||||
* If page->mapping is NULL, then it cannot be a PageAnon
|
||||
* page; but it might be the ZERO_PAGE or in the gate area or
|
||||
@@ -536,19 +549,31 @@ again:
|
||||
* shmem_writepage move it from filecache to swapcache beneath us:
|
||||
* an unlikely race, but we do need to retry for page->mapping.
|
||||
*/
|
||||
mapping = compound_head(page)->mapping;
|
||||
if (!mapping) {
|
||||
int shmem_swizzled = PageSwapCache(page);
|
||||
if (unlikely(!mapping)) {
|
||||
int shmem_swizzled;
|
||||
|
||||
/*
|
||||
* Page lock is required to identify which special case above
|
||||
* applies. If this is really a shmem page then the page lock
|
||||
* will prevent unexpected transitions.
|
||||
*/
|
||||
lock_page(page);
|
||||
shmem_swizzled = PageSwapCache(page) || page->mapping;
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
|
||||
if (shmem_swizzled)
|
||||
goto again;
|
||||
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
/*
|
||||
* Private mappings are handled in a simple way.
|
||||
*
|
||||
* If the futex key is stored on an anonymous page, then the associated
|
||||
* object is the mm which is implicitly pinned by the calling process.
|
||||
*
|
||||
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
|
||||
* it's a read-only handle, it's expected that futexes attach to
|
||||
* the object not the particular process.
|
||||
@@ -566,16 +591,74 @@ again:
|
||||
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
} else {
|
||||
struct inode *inode;
|
||||
|
||||
/*
|
||||
* The associated futex object in this case is the inode and
|
||||
* the page->mapping must be traversed. Ordinarily this should
|
||||
* be stabilised under page lock but it's not strictly
|
||||
* necessary in this case as we just want to pin the inode, not
|
||||
* update the radix tree or anything like that.
|
||||
*
|
||||
* The RCU read lock is taken as the inode is finally freed
|
||||
* under RCU. If the mapping still matches expectations then the
|
||||
* mapping->host can be safely accessed as being a valid inode.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
if (READ_ONCE(page->mapping) != mapping) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
inode = READ_ONCE(mapping->host);
|
||||
if (!inode) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a reference unless it is about to be freed. Previously
|
||||
* this reference was taken by ihold under the page lock
|
||||
* pinning the inode in place so i_lock was unnecessary. The
|
||||
* only way for this check to fail is if the inode was
|
||||
* truncated in parallel so warn for now if this happens.
|
||||
*
|
||||
* We are not calling into get_futex_key_refs() in file-backed
|
||||
* cases, therefore a successful atomic_inc return below will
|
||||
* guarantee that get_futex_key() will still imply smp_mb(); (B).
|
||||
*/
|
||||
if (WARN_ON_ONCE(!atomic_inc_not_zero(&inode->i_count))) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* Should be impossible but lets be paranoid for now */
|
||||
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
|
||||
err = -EFAULT;
|
||||
rcu_read_unlock();
|
||||
iput(inode);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||
key->shared.inode = mapping->host;
|
||||
key->shared.inode = inode;
|
||||
key->shared.pgoff = basepage_index(page);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
get_futex_key_refs(key); /* implies MB (B) */
|
||||
|
||||
out:
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
return err;
|
||||
}
|
||||
@@ -1864,7 +1947,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
|
||||
|
||||
q->lock_ptr = &hb->lock;
|
||||
|
||||
spin_lock(&hb->lock); /* implies MB (A) */
|
||||
spin_lock(&hb->lock); /* implies smp_mb(); (A) */
|
||||
return hb;
|
||||
}
|
||||
|
||||
@@ -1927,8 +2010,12 @@ static int unqueue_me(struct futex_q *q)
|
||||
|
||||
/* In the common case we don't take the spinlock, which is nice. */
|
||||
retry:
|
||||
lock_ptr = q->lock_ptr;
|
||||
barrier();
|
||||
/*
|
||||
* q->lock_ptr can change between this read and the following spin_lock.
|
||||
* Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
|
||||
* optimizing lock_ptr out of the logic below.
|
||||
*/
|
||||
lock_ptr = READ_ONCE(q->lock_ptr);
|
||||
if (lock_ptr != NULL) {
|
||||
spin_lock(lock_ptr);
|
||||
/*
|
||||
|
@@ -64,6 +64,10 @@ config IRQ_DOMAIN_HIERARCHY
|
||||
bool
|
||||
select IRQ_DOMAIN
|
||||
|
||||
# Generic IRQ IPI support
|
||||
config GENERIC_IRQ_IPI
|
||||
bool
|
||||
|
||||
# Generic MSI interrupt support
|
||||
config GENERIC_MSI_IRQ
|
||||
bool
|
||||
|
@@ -8,3 +8,4 @@ obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
|
||||
obj-$(CONFIG_GENERIC_IRQ_MIGRATION) += cpuhotplug.o
|
||||
obj-$(CONFIG_PM_SLEEP) += pm.o
|
||||
obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
|
||||
obj-$(CONFIG_GENERIC_IRQ_IPI) += ipi.o
|
||||
|
@@ -961,6 +961,7 @@ void irq_chip_mask_parent(struct irq_data *data)
|
||||
data = data->parent_data;
|
||||
data->chip->irq_mask(data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_chip_mask_parent);
|
||||
|
||||
/**
|
||||
* irq_chip_unmask_parent - Unmask the parent interrupt
|
||||
@@ -971,6 +972,7 @@ void irq_chip_unmask_parent(struct irq_data *data)
|
||||
data = data->parent_data;
|
||||
data->chip->irq_unmask(data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_chip_unmask_parent);
|
||||
|
||||
/**
|
||||
* irq_chip_eoi_parent - Invoke EOI on the parent interrupt
|
||||
@@ -981,6 +983,7 @@ void irq_chip_eoi_parent(struct irq_data *data)
|
||||
data = data->parent_data;
|
||||
data->chip->irq_eoi(data);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_chip_eoi_parent);
|
||||
|
||||
/**
|
||||
* irq_chip_set_affinity_parent - Set affinity on the parent interrupt
|
||||
@@ -1016,6 +1019,7 @@ int irq_chip_set_type_parent(struct irq_data *data, unsigned int type)
|
||||
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_chip_set_type_parent);
|
||||
|
||||
/**
|
||||
* irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
|
||||
|
@@ -136,10 +136,9 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
|
||||
{
|
||||
irqreturn_t retval = IRQ_NONE;
|
||||
unsigned int flags = 0, irq = desc->irq_data.irq;
|
||||
struct irqaction *action = desc->action;
|
||||
struct irqaction *action;
|
||||
|
||||
/* action might have become NULL since we dropped the lock */
|
||||
while (action) {
|
||||
for_each_action_of_desc(desc, action) {
|
||||
irqreturn_t res;
|
||||
|
||||
trace_irq_handler_entry(irq, action);
|
||||
@@ -173,7 +172,6 @@ irqreturn_t handle_irq_event_percpu(struct irq_desc *desc)
|
||||
}
|
||||
|
||||
retval |= res;
|
||||
action = action->next;
|
||||
}
|
||||
|
||||
add_interrupt_randomness(irq, flags);
|
||||
|
@@ -131,6 +131,9 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc)
|
||||
#define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK)
|
||||
#define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU)
|
||||
|
||||
#define for_each_action_of_desc(desc, act) \
|
||||
for (act = desc->act; act; act = act->next)
|
||||
|
||||
struct irq_desc *
|
||||
__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
|
||||
unsigned int check);
|
||||
@@ -160,6 +163,8 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
|
||||
__irq_put_desc_unlock(desc, flags, false);
|
||||
}
|
||||
|
||||
#define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors)
|
||||
|
||||
/*
|
||||
* Manipulation functions for irq_data.state
|
||||
*/
|
||||
@@ -188,6 +193,8 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
|
||||
return __irqd_to_state(d) & mask;
|
||||
}
|
||||
|
||||
#undef __irqd_to_state
|
||||
|
||||
static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
|
||||
{
|
||||
__this_cpu_inc(*desc->kstat_irqs);
|
||||
|
326
kernel/irq/ipi.c
Normal file
326
kernel/irq/ipi.c
Normal file
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
* linux/kernel/irq/ipi.c
|
||||
*
|
||||
* Copyright (C) 2015 Imagination Technologies Ltd
|
||||
* Author: Qais Yousef <qais.yousef@imgtec.com>
|
||||
*
|
||||
* This file contains driver APIs to the IPI subsystem.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "genirq/ipi: " fmt
|
||||
|
||||
#include <linux/irqdomain.h>
|
||||
#include <linux/irq.h>
|
||||
|
||||
/**
|
||||
* irq_reserve_ipi() - Setup an IPI to destination cpumask
|
||||
* @domain: IPI domain
|
||||
* @dest: cpumask of cpus which can receive the IPI
|
||||
*
|
||||
* Allocate a virq that can be used to send IPI to any CPU in dest mask.
|
||||
*
|
||||
* On success it'll return linux irq number and 0 on failure
|
||||
*/
|
||||
unsigned int irq_reserve_ipi(struct irq_domain *domain,
|
||||
const struct cpumask *dest)
|
||||
{
|
||||
unsigned int nr_irqs, offset;
|
||||
struct irq_data *data;
|
||||
int virq, i;
|
||||
|
||||
if (!domain ||!irq_domain_is_ipi(domain)) {
|
||||
pr_warn("Reservation on a non IPI domain\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!cpumask_subset(dest, cpu_possible_mask)) {
|
||||
pr_warn("Reservation is not in possible_cpu_mask\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
nr_irqs = cpumask_weight(dest);
|
||||
if (!nr_irqs) {
|
||||
pr_warn("Reservation for empty destination mask\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (irq_domain_is_ipi_single(domain)) {
|
||||
/*
|
||||
* If the underlying implementation uses a single HW irq on
|
||||
* all cpus then we only need a single Linux irq number for
|
||||
* it. We have no restrictions vs. the destination mask. The
|
||||
* underlying implementation can deal with holes nicely.
|
||||
*/
|
||||
nr_irqs = 1;
|
||||
offset = 0;
|
||||
} else {
|
||||
unsigned int next;
|
||||
|
||||
/*
|
||||
* The IPI requires a seperate HW irq on each CPU. We require
|
||||
* that the destination mask is consecutive. If an
|
||||
* implementation needs to support holes, it can reserve
|
||||
* several IPI ranges.
|
||||
*/
|
||||
offset = cpumask_first(dest);
|
||||
/*
|
||||
* Find a hole and if found look for another set bit after the
|
||||
* hole. For now we don't support this scenario.
|
||||
*/
|
||||
next = cpumask_next_zero(offset, dest);
|
||||
if (next < nr_cpu_ids)
|
||||
next = cpumask_next(next, dest);
|
||||
if (next < nr_cpu_ids) {
|
||||
pr_warn("Destination mask has holes\n");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
virq = irq_domain_alloc_descs(-1, nr_irqs, 0, NUMA_NO_NODE);
|
||||
if (virq <= 0) {
|
||||
pr_warn("Can't reserve IPI, failed to alloc descs\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
virq = __irq_domain_alloc_irqs(domain, virq, nr_irqs, NUMA_NO_NODE,
|
||||
(void *) dest, true);
|
||||
|
||||
if (virq <= 0) {
|
||||
pr_warn("Can't reserve IPI, failed to alloc hw irqs\n");
|
||||
goto free_descs;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_irqs; i++) {
|
||||
data = irq_get_irq_data(virq + i);
|
||||
cpumask_copy(data->common->affinity, dest);
|
||||
data->common->ipi_offset = offset;
|
||||
}
|
||||
return virq;
|
||||
|
||||
free_descs:
|
||||
irq_free_descs(virq, nr_irqs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_destroy_ipi() - unreserve an IPI that was previously allocated
|
||||
* @irq: linux irq number to be destroyed
|
||||
*
|
||||
* Return the IPIs allocated with irq_reserve_ipi() to the system destroying
|
||||
* all virqs associated with them.
|
||||
*/
|
||||
void irq_destroy_ipi(unsigned int irq)
|
||||
{
|
||||
struct irq_data *data = irq_get_irq_data(irq);
|
||||
struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
|
||||
struct irq_domain *domain;
|
||||
unsigned int nr_irqs;
|
||||
|
||||
if (!irq || !data || !ipimask)
|
||||
return;
|
||||
|
||||
domain = data->domain;
|
||||
if (WARN_ON(domain == NULL))
|
||||
return;
|
||||
|
||||
if (!irq_domain_is_ipi(domain)) {
|
||||
pr_warn("Trying to destroy a non IPI domain!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (irq_domain_is_ipi_per_cpu(domain))
|
||||
nr_irqs = cpumask_weight(ipimask);
|
||||
else
|
||||
nr_irqs = 1;
|
||||
|
||||
irq_domain_free_irqs(irq, nr_irqs);
|
||||
}
|
||||
|
||||
/**
|
||||
* ipi_get_hwirq - Get the hwirq associated with an IPI to a cpu
|
||||
* @irq: linux irq number
|
||||
* @cpu: the target cpu
|
||||
*
|
||||
* When dealing with coprocessors IPI, we need to inform the coprocessor of
|
||||
* the hwirq it needs to use to receive and send IPIs.
|
||||
*
|
||||
* Returns hwirq value on success and INVALID_HWIRQ on failure.
|
||||
*/
|
||||
irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu)
|
||||
{
|
||||
struct irq_data *data = irq_get_irq_data(irq);
|
||||
struct cpumask *ipimask = data ? irq_data_get_affinity_mask(data) : NULL;
|
||||
|
||||
if (!data || !ipimask || cpu > nr_cpu_ids)
|
||||
return INVALID_HWIRQ;
|
||||
|
||||
if (!cpumask_test_cpu(cpu, ipimask))
|
||||
return INVALID_HWIRQ;
|
||||
|
||||
/*
|
||||
* Get the real hardware irq number if the underlying implementation
|
||||
* uses a seperate irq per cpu. If the underlying implementation uses
|
||||
* a single hardware irq for all cpus then the IPI send mechanism
|
||||
* needs to take care of the cpu destinations.
|
||||
*/
|
||||
if (irq_domain_is_ipi_per_cpu(data->domain))
|
||||
data = irq_get_irq_data(irq + cpu - data->common->ipi_offset);
|
||||
|
||||
return data ? irqd_to_hwirq(data) : INVALID_HWIRQ;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ipi_get_hwirq);
|
||||
|
||||
static int ipi_send_verify(struct irq_chip *chip, struct irq_data *data,
|
||||
const struct cpumask *dest, unsigned int cpu)
|
||||
{
|
||||
struct cpumask *ipimask = irq_data_get_affinity_mask(data);
|
||||
|
||||
if (!chip || !ipimask)
|
||||
return -EINVAL;
|
||||
|
||||
if (!chip->ipi_send_single && !chip->ipi_send_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (cpu > nr_cpu_ids)
|
||||
return -EINVAL;
|
||||
|
||||
if (dest) {
|
||||
if (!cpumask_subset(dest, ipimask))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (!cpumask_test_cpu(cpu, ipimask))
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* __ipi_send_single - send an IPI to a target Linux SMP CPU
|
||||
* @desc: pointer to irq_desc of the IRQ
|
||||
* @cpu: destination CPU, must in the destination mask passed to
|
||||
* irq_reserve_ipi()
|
||||
*
|
||||
* This function is for architecture or core code to speed up IPI sending. Not
|
||||
* usable from driver code.
|
||||
*
|
||||
* Returns zero on success and negative error number on failure.
|
||||
*/
|
||||
int __ipi_send_single(struct irq_desc *desc, unsigned int cpu)
|
||||
{
|
||||
struct irq_data *data = irq_desc_get_irq_data(desc);
|
||||
struct irq_chip *chip = irq_data_get_irq_chip(data);
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Minimise the overhead by omitting the checks for Linux SMP IPIs.
|
||||
* Since the callers should be arch or core code which is generally
|
||||
* trusted, only check for errors when debugging.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
if (!chip->ipi_send_single) {
|
||||
chip->ipi_send_mask(data, cpumask_of(cpu));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* FIXME: Store this information in irqdata flags */
|
||||
if (irq_domain_is_ipi_per_cpu(data->domain) &&
|
||||
cpu != data->common->ipi_offset) {
|
||||
/* use the correct data for that cpu */
|
||||
unsigned irq = data->irq + cpu - data->common->ipi_offset;
|
||||
|
||||
data = irq_get_irq_data(irq);
|
||||
}
|
||||
chip->ipi_send_single(data, cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ipi_send_mask - send an IPI to target Linux SMP CPU(s)
|
||||
* @desc: pointer to irq_desc of the IRQ
|
||||
* @dest: dest CPU(s), must be a subset of the mask passed to
|
||||
* irq_reserve_ipi()
|
||||
*
|
||||
* This function is for architecture or core code to speed up IPI sending. Not
|
||||
* usable from driver code.
|
||||
*
|
||||
* Returns zero on success and negative error number on failure.
|
||||
*/
|
||||
int __ipi_send_mask(struct irq_desc *desc, const struct cpumask *dest)
|
||||
{
|
||||
struct irq_data *data = irq_desc_get_irq_data(desc);
|
||||
struct irq_chip *chip = irq_data_get_irq_chip(data);
|
||||
unsigned int cpu;
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
* Minimise the overhead by omitting the checks for Linux SMP IPIs.
|
||||
* Since the callers should be arch or core code which is generally
|
||||
* trusted, only check for errors when debugging.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
if (chip->ipi_send_mask) {
|
||||
chip->ipi_send_mask(data, dest);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (irq_domain_is_ipi_per_cpu(data->domain)) {
|
||||
unsigned int base = data->irq;
|
||||
|
||||
for_each_cpu(cpu, dest) {
|
||||
unsigned irq = base + cpu - data->common->ipi_offset;
|
||||
|
||||
data = irq_get_irq_data(irq);
|
||||
chip->ipi_send_single(data, cpu);
|
||||
}
|
||||
} else {
|
||||
for_each_cpu(cpu, dest)
|
||||
chip->ipi_send_single(data, cpu);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* ipi_send_single - Send an IPI to a single CPU
|
||||
* @virq: linux irq number from irq_reserve_ipi()
|
||||
* @cpu: destination CPU, must in the destination mask passed to
|
||||
* irq_reserve_ipi()
|
||||
*
|
||||
* Returns zero on success and negative error number on failure.
|
||||
*/
|
||||
int ipi_send_single(unsigned int virq, unsigned int cpu)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(virq);
|
||||
struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
|
||||
struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
|
||||
|
||||
if (WARN_ON_ONCE(ipi_send_verify(chip, data, NULL, cpu)))
|
||||
return -EINVAL;
|
||||
|
||||
return __ipi_send_single(desc, cpu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ipi_send_single);
|
||||
|
||||
/**
|
||||
* ipi_send_mask - Send an IPI to target CPU(s)
|
||||
* @virq: linux irq number from irq_reserve_ipi()
|
||||
* @dest: dest CPU(s), must be a subset of the mask passed to
|
||||
* irq_reserve_ipi()
|
||||
*
|
||||
* Returns zero on success and negative error number on failure.
|
||||
*/
|
||||
int ipi_send_mask(unsigned int virq, const struct cpumask *dest)
|
||||
{
|
||||
struct irq_desc *desc = irq_to_desc(virq);
|
||||
struct irq_data *data = desc ? irq_desc_get_irq_data(desc) : NULL;
|
||||
struct irq_chip *chip = data ? irq_data_get_irq_chip(data) : NULL;
|
||||
|
||||
if (WARN_ON_ONCE(ipi_send_verify(chip, data, dest, 0)))
|
||||
return -EINVAL;
|
||||
|
||||
return __ipi_send_mask(desc, dest);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ipi_send_mask);
|
@@ -24,10 +24,27 @@
|
||||
static struct lock_class_key irq_desc_lock_class;
|
||||
|
||||
#if defined(CONFIG_SMP)
|
||||
static int __init irq_affinity_setup(char *str)
|
||||
{
|
||||
zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
|
||||
cpulist_parse(str, irq_default_affinity);
|
||||
/*
|
||||
* Set at least the boot cpu. We don't want to end up with
|
||||
* bugreports caused by random comandline masks
|
||||
*/
|
||||
cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
|
||||
return 1;
|
||||
}
|
||||
__setup("irqaffinity=", irq_affinity_setup);
|
||||
|
||||
static void __init init_irq_default_affinity(void)
|
||||
{
|
||||
alloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
|
||||
cpumask_setall(irq_default_affinity);
|
||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
||||
if (!irq_default_affinity)
|
||||
zalloc_cpumask_var(&irq_default_affinity, GFP_NOWAIT);
|
||||
#endif
|
||||
if (cpumask_empty(irq_default_affinity))
|
||||
cpumask_setall(irq_default_affinity);
|
||||
}
|
||||
#else
|
||||
static void __init init_irq_default_affinity(void)
|
||||
|
@@ -23,8 +23,6 @@ static DEFINE_MUTEX(irq_domain_mutex);
|
||||
static DEFINE_MUTEX(revmap_trees_mutex);
|
||||
static struct irq_domain *irq_default_domain;
|
||||
|
||||
static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
|
||||
irq_hw_number_t hwirq, int node);
|
||||
static void irq_domain_check_hierarchy(struct irq_domain *domain);
|
||||
|
||||
struct irqchip_fwid {
|
||||
@@ -840,8 +838,8 @@ const struct irq_domain_ops irq_domain_simple_ops = {
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
|
||||
|
||||
static int irq_domain_alloc_descs(int virq, unsigned int cnt,
|
||||
irq_hw_number_t hwirq, int node)
|
||||
int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq,
|
||||
int node)
|
||||
{
|
||||
unsigned int hint;
|
||||
|
||||
@@ -895,6 +893,7 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
|
||||
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_create_hierarchy);
|
||||
|
||||
static void irq_domain_insert_irq(int virq)
|
||||
{
|
||||
@@ -1045,6 +1044,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
|
||||
|
||||
/**
|
||||
* irq_domain_set_info - Set the complete data for a @virq in @domain
|
||||
@@ -1078,6 +1078,7 @@ void irq_domain_reset_irq_data(struct irq_data *irq_data)
|
||||
irq_data->chip = &no_irq_chip;
|
||||
irq_data->chip_data = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_reset_irq_data);
|
||||
|
||||
/**
|
||||
* irq_domain_free_irqs_common - Clear irq_data and free the parent
|
||||
@@ -1275,6 +1276,7 @@ int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
|
||||
nr_irqs, arg);
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_alloc_irqs_parent);
|
||||
|
||||
/**
|
||||
* irq_domain_free_irqs_parent - Free interrupts from parent domain
|
||||
@@ -1292,6 +1294,7 @@ void irq_domain_free_irqs_parent(struct irq_domain *domain,
|
||||
irq_domain_free_irqs_recursive(domain->parent, irq_base,
|
||||
nr_irqs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_free_irqs_parent);
|
||||
|
||||
/**
|
||||
* irq_domain_activate_irq - Call domain_ops->activate recursively to activate
|
||||
|
@@ -144,13 +144,11 @@ int irq_can_set_affinity(unsigned int irq)
|
||||
*/
|
||||
void irq_set_thread_affinity(struct irq_desc *desc)
|
||||
{
|
||||
struct irqaction *action = desc->action;
|
||||
struct irqaction *action;
|
||||
|
||||
while (action) {
|
||||
for_each_action_of_desc(desc, action)
|
||||
if (action->thread)
|
||||
set_bit(IRQTF_AFFINITY, &action->thread_flags);
|
||||
action = action->next;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
||||
@@ -994,7 +992,7 @@ void irq_wake_thread(unsigned int irq, void *dev_id)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
for (action = desc->action; action; action = action->next) {
|
||||
for_each_action_of_desc(desc, action) {
|
||||
if (action->dev_id == dev_id) {
|
||||
if (action->thread)
|
||||
__irq_wake_thread(desc, action);
|
||||
|
@@ -291,7 +291,7 @@ static int name_unique(unsigned int irq, struct irqaction *new_action)
|
||||
int ret = 1;
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
for (action = desc->action ; action; action = action->next) {
|
||||
for_each_action_of_desc(desc, action) {
|
||||
if ((action != new_action) && action->name &&
|
||||
!strcmp(new_action->name, action->name)) {
|
||||
ret = 0;
|
||||
|
@@ -211,14 +211,12 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
|
||||
* desc->lock here. See synchronize_irq().
|
||||
*/
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
action = desc->action;
|
||||
while (action) {
|
||||
for_each_action_of_desc(desc, action) {
|
||||
printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
|
||||
if (action->thread_fn)
|
||||
printk(KERN_CONT " threaded [<%p>] %pf",
|
||||
action->thread_fn, action->thread_fn);
|
||||
printk(KERN_CONT "\n");
|
||||
action = action->next;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&desc->lock, flags);
|
||||
}
|
||||
|
@@ -66,13 +66,15 @@ struct resource crashk_res = {
|
||||
.name = "Crash kernel",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
|
||||
.desc = IORES_DESC_CRASH_KERNEL
|
||||
};
|
||||
struct resource crashk_low_res = {
|
||||
.name = "Crash kernel",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
|
||||
.desc = IORES_DESC_CRASH_KERNEL
|
||||
};
|
||||
|
||||
int kexec_should_crash(struct task_struct *p)
|
||||
@@ -959,7 +961,7 @@ int crash_shrink_memory(unsigned long new_size)
|
||||
|
||||
ram_res->start = end;
|
||||
ram_res->end = crashk_res.end;
|
||||
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
||||
ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
|
||||
ram_res->name = "System RAM";
|
||||
|
||||
crashk_res.end = end - 1;
|
||||
|
@@ -524,10 +524,10 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
|
||||
|
||||
/* Walk the RAM ranges and allocate a suitable range for the buffer */
|
||||
if (image->type == KEXEC_TYPE_CRASH)
|
||||
ret = walk_iomem_res("Crash kernel",
|
||||
IORESOURCE_MEM | IORESOURCE_BUSY,
|
||||
crashk_res.start, crashk_res.end, kbuf,
|
||||
locate_mem_hole_callback);
|
||||
ret = walk_iomem_res_desc(crashk_res.desc,
|
||||
IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
|
||||
crashk_res.start, crashk_res.end, kbuf,
|
||||
locate_mem_hole_callback);
|
||||
else
|
||||
ret = walk_system_ram_res(0, -1, kbuf,
|
||||
locate_mem_hole_callback);
|
||||
|
@@ -47,12 +47,12 @@
|
||||
* of times)
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/list.h>
|
||||
@@ -289,4 +289,16 @@ static int __init init_lstats_procfs(void)
|
||||
proc_create("latency_stats", 0644, NULL, &lstats_fops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int sysctl_latencytop(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = proc_dointvec(table, write, buffer, lenp, ppos);
|
||||
if (latencytop_enabled)
|
||||
force_schedstat_enabled();
|
||||
|
||||
return err;
|
||||
}
|
||||
device_initcall(init_lstats_procfs);
|
||||
|
@@ -123,8 +123,6 @@ static inline int debug_locks_off_graph_unlock(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lockdep_initialized;
|
||||
|
||||
unsigned long nr_list_entries;
|
||||
static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
|
||||
|
||||
@@ -433,19 +431,6 @@ unsigned int nr_process_chains;
|
||||
unsigned int max_lockdep_depth;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
/*
|
||||
* We cannot printk in early bootup code. Not even early_printk()
|
||||
* might work. So we mark any initialization errors and printk
|
||||
* about it later on, in lockdep_info().
|
||||
*/
|
||||
static int lockdep_init_error;
|
||||
static const char *lock_init_error;
|
||||
static unsigned long lockdep_init_trace_data[20];
|
||||
static struct stack_trace lockdep_init_trace = {
|
||||
.max_entries = ARRAY_SIZE(lockdep_init_trace_data),
|
||||
.entries = lockdep_init_trace_data,
|
||||
};
|
||||
|
||||
/*
|
||||
* Various lockdep statistics:
|
||||
*/
|
||||
@@ -669,20 +654,6 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
|
||||
struct hlist_head *hash_head;
|
||||
struct lock_class *class;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
/*
|
||||
* If the architecture calls into lockdep before initializing
|
||||
* the hashes then we'll warn about it later. (we cannot printk
|
||||
* right now)
|
||||
*/
|
||||
if (unlikely(!lockdep_initialized)) {
|
||||
lockdep_init();
|
||||
lockdep_init_error = 1;
|
||||
lock_init_error = lock->name;
|
||||
save_stack_trace(&lockdep_init_trace);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) {
|
||||
debug_locks_off();
|
||||
printk(KERN_ERR
|
||||
@@ -2010,6 +1981,53 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i)
|
||||
return lock_classes + chain_hlocks[chain->base + i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the index of the first held_lock of the current chain
|
||||
*/
|
||||
static inline int get_first_held_lock(struct task_struct *curr,
|
||||
struct held_lock *hlock)
|
||||
{
|
||||
int i;
|
||||
struct held_lock *hlock_curr;
|
||||
|
||||
for (i = curr->lockdep_depth - 1; i >= 0; i--) {
|
||||
hlock_curr = curr->held_locks + i;
|
||||
if (hlock_curr->irq_context != hlock->irq_context)
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
return ++i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Checks whether the chain and the current held locks are consistent
|
||||
* in depth and also in content. If they are not it most likely means
|
||||
* that there was a collision during the calculation of the chain_key.
|
||||
* Returns: 0 not passed, 1 passed
|
||||
*/
|
||||
static int check_no_collision(struct task_struct *curr,
|
||||
struct held_lock *hlock,
|
||||
struct lock_chain *chain)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
int i, j, id;
|
||||
|
||||
i = get_first_held_lock(curr, hlock);
|
||||
|
||||
if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1)))
|
||||
return 0;
|
||||
|
||||
for (j = 0; j < chain->depth - 1; j++, i++) {
|
||||
id = curr->held_locks[i].class_idx - 1;
|
||||
|
||||
if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id))
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up a dependency chain. If the key is not present yet then
|
||||
* add it and return 1 - in this case the new dependency chain is
|
||||
@@ -2023,7 +2041,6 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
struct lock_class *class = hlock_class(hlock);
|
||||
struct hlist_head *hash_head = chainhashentry(chain_key);
|
||||
struct lock_chain *chain;
|
||||
struct held_lock *hlock_curr;
|
||||
int i, j;
|
||||
|
||||
/*
|
||||
@@ -2041,6 +2058,9 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
if (chain->chain_key == chain_key) {
|
||||
cache_hit:
|
||||
debug_atomic_inc(chain_lookup_hits);
|
||||
if (!check_no_collision(curr, hlock, chain))
|
||||
return 0;
|
||||
|
||||
if (very_verbose(class))
|
||||
printk("\nhash chain already cached, key: "
|
||||
"%016Lx tail class: [%p] %s\n",
|
||||
@@ -2078,13 +2098,7 @@ cache_hit:
|
||||
chain = lock_chains + nr_lock_chains++;
|
||||
chain->chain_key = chain_key;
|
||||
chain->irq_context = hlock->irq_context;
|
||||
/* Find the first held_lock of current chain */
|
||||
for (i = curr->lockdep_depth - 1; i >= 0; i--) {
|
||||
hlock_curr = curr->held_locks + i;
|
||||
if (hlock_curr->irq_context != hlock->irq_context)
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
i = get_first_held_lock(curr, hlock);
|
||||
chain->depth = curr->lockdep_depth + 1 - i;
|
||||
if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) {
|
||||
chain->base = nr_chain_hlocks;
|
||||
@@ -2172,7 +2186,7 @@ static void check_chain_key(struct task_struct *curr)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
struct held_lock *hlock, *prev_hlock = NULL;
|
||||
unsigned int i, id;
|
||||
unsigned int i;
|
||||
u64 chain_key = 0;
|
||||
|
||||
for (i = 0; i < curr->lockdep_depth; i++) {
|
||||
@@ -2189,17 +2203,16 @@ static void check_chain_key(struct task_struct *curr)
|
||||
(unsigned long long)hlock->prev_chain_key);
|
||||
return;
|
||||
}
|
||||
id = hlock->class_idx - 1;
|
||||
/*
|
||||
* Whoops ran out of static storage again?
|
||||
*/
|
||||
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
|
||||
if (DEBUG_LOCKS_WARN_ON(hlock->class_idx > MAX_LOCKDEP_KEYS))
|
||||
return;
|
||||
|
||||
if (prev_hlock && (prev_hlock->irq_context !=
|
||||
hlock->irq_context))
|
||||
chain_key = 0;
|
||||
chain_key = iterate_chain_key(chain_key, id);
|
||||
chain_key = iterate_chain_key(chain_key, hlock->class_idx);
|
||||
prev_hlock = hlock;
|
||||
}
|
||||
if (chain_key != curr->curr_chain_key) {
|
||||
@@ -3077,7 +3090,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
struct task_struct *curr = current;
|
||||
struct lock_class *class = NULL;
|
||||
struct held_lock *hlock;
|
||||
unsigned int depth, id;
|
||||
unsigned int depth;
|
||||
int chain_head = 0;
|
||||
int class_idx;
|
||||
u64 chain_key;
|
||||
@@ -3180,11 +3193,10 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
* The 'key ID' is what is the most compact key value to drive
|
||||
* the hash, not class->key.
|
||||
*/
|
||||
id = class - lock_classes;
|
||||
/*
|
||||
* Whoops, we did it again.. ran straight out of our static allocation.
|
||||
*/
|
||||
if (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
|
||||
if (DEBUG_LOCKS_WARN_ON(class_idx > MAX_LOCKDEP_KEYS))
|
||||
return 0;
|
||||
|
||||
chain_key = curr->curr_chain_key;
|
||||
@@ -3202,7 +3214,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
chain_key = 0;
|
||||
chain_head = 1;
|
||||
}
|
||||
chain_key = iterate_chain_key(chain_key, id);
|
||||
chain_key = iterate_chain_key(chain_key, class_idx);
|
||||
|
||||
if (nest_lock && !__lock_is_held(nest_lock))
|
||||
return print_lock_nested_lock_not_held(curr, hlock, ip);
|
||||
@@ -4013,28 +4025,6 @@ out_restore:
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void lockdep_init(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Some architectures have their own start_kernel()
|
||||
* code which calls lockdep_init(), while we also
|
||||
* call lockdep_init() from the start_kernel() itself,
|
||||
* and we want to initialize the hashes only once:
|
||||
*/
|
||||
if (lockdep_initialized)
|
||||
return;
|
||||
|
||||
for (i = 0; i < CLASSHASH_SIZE; i++)
|
||||
INIT_HLIST_HEAD(classhash_table + i);
|
||||
|
||||
for (i = 0; i < CHAINHASH_SIZE; i++)
|
||||
INIT_HLIST_HEAD(chainhash_table + i);
|
||||
|
||||
lockdep_initialized = 1;
|
||||
}
|
||||
|
||||
void __init lockdep_info(void)
|
||||
{
|
||||
printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n");
|
||||
@@ -4061,14 +4051,6 @@ void __init lockdep_info(void)
|
||||
|
||||
printk(" per task-struct memory footprint: %lu bytes\n",
|
||||
sizeof(struct held_lock) * MAX_LOCK_DEPTH);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
if (lockdep_init_error) {
|
||||
printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
|
||||
printk("Call stack leading to lockdep invocation was:\n");
|
||||
print_stack_trace(&lockdep_init_trace, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -67,7 +67,13 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
|
||||
prev = xchg_acquire(lock, node);
|
||||
/*
|
||||
* We rely on the full barrier with global transitivity implied by the
|
||||
* below xchg() to order the initialization stores above against any
|
||||
* observation of @node. And to provide the ACQUIRE ordering associated
|
||||
* with a LOCK primitive.
|
||||
*/
|
||||
prev = xchg(lock, node);
|
||||
if (likely(prev == NULL)) {
|
||||
/*
|
||||
* Lock acquired, don't need to set node->locked to 1. Threads
|
||||
|
@@ -716,6 +716,7 @@ static inline void
|
||||
__mutex_unlock_common_slowpath(struct mutex *lock, int nested)
|
||||
{
|
||||
unsigned long flags;
|
||||
WAKE_Q(wake_q);
|
||||
|
||||
/*
|
||||
* As a performance measurement, release the lock before doing other
|
||||
@@ -743,11 +744,11 @@ __mutex_unlock_common_slowpath(struct mutex *lock, int nested)
|
||||
struct mutex_waiter, list);
|
||||
|
||||
debug_mutex_wake_waiter(lock, waiter);
|
||||
|
||||
wake_up_process(waiter->task);
|
||||
wake_q_add(&wake_q, waiter->task);
|
||||
}
|
||||
|
||||
spin_unlock_mutex(&lock->wait_lock, flags);
|
||||
wake_up_q(&wake_q);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -358,8 +358,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
||||
* sequentiality; this is because not all clear_pending_set_locked()
|
||||
* implementations imply full barriers.
|
||||
*/
|
||||
while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
|
||||
cpu_relax();
|
||||
smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
|
||||
|
||||
/*
|
||||
* take ownership and clear the pending bit.
|
||||
@@ -435,7 +434,7 @@ queue:
|
||||
*
|
||||
* The PV pv_wait_head_or_lock function, if active, will acquire
|
||||
* the lock and return a non-zero value. So we have to skip the
|
||||
* smp_load_acquire() call. As the next PV queue head hasn't been
|
||||
* smp_cond_acquire() call. As the next PV queue head hasn't been
|
||||
* designated yet, there is no way for the locked value to become
|
||||
* _Q_SLOW_VAL. So both the set_locked() and the
|
||||
* atomic_cmpxchg_relaxed() calls will be safe.
|
||||
@@ -466,7 +465,7 @@ locked:
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* The smp_load_acquire() call above has provided the necessary
|
||||
* The smp_cond_acquire() call above has provided the necessary
|
||||
* acquire semantics required for locking. At most two
|
||||
* iterations of this loop may be ran.
|
||||
*/
|
||||
|
@@ -54,6 +54,11 @@ struct pv_node {
|
||||
u8 state;
|
||||
};
|
||||
|
||||
/*
|
||||
* Include queued spinlock statistics code
|
||||
*/
|
||||
#include "qspinlock_stat.h"
|
||||
|
||||
/*
|
||||
* By replacing the regular queued_spin_trylock() with the function below,
|
||||
* it will be called once when a lock waiter enter the PV slowpath before
|
||||
@@ -65,9 +70,11 @@ struct pv_node {
|
||||
static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
|
||||
{
|
||||
struct __qspinlock *l = (void *)lock;
|
||||
int ret = !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
|
||||
(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
|
||||
|
||||
return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
|
||||
(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
|
||||
qstat_inc(qstat_pv_lock_stealing, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -137,11 +144,6 @@ static __always_inline int trylock_clear_pending(struct qspinlock *lock)
|
||||
}
|
||||
#endif /* _Q_PENDING_BITS == 8 */
|
||||
|
||||
/*
|
||||
* Include queued spinlock statistics code
|
||||
*/
|
||||
#include "qspinlock_stat.h"
|
||||
|
||||
/*
|
||||
* Lock and MCS node addresses hash table for fast lookup
|
||||
*
|
||||
@@ -398,6 +400,11 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
|
||||
if (READ_ONCE(pn->state) == vcpu_hashed)
|
||||
lp = (struct qspinlock **)1;
|
||||
|
||||
/*
|
||||
* Tracking # of slowpath locking operations
|
||||
*/
|
||||
qstat_inc(qstat_pv_lock_slowpath, true);
|
||||
|
||||
for (;; waitcnt++) {
|
||||
/*
|
||||
* Set correct vCPU state to be used by queue node wait-early
|
||||
|
@@ -22,6 +22,7 @@
|
||||
* pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
|
||||
* pv_latency_kick - average latency (ns) of vCPU kick operation
|
||||
* pv_latency_wake - average latency (ns) from vCPU kick to wakeup
|
||||
* pv_lock_slowpath - # of locking operations via the slowpath
|
||||
* pv_lock_stealing - # of lock stealing operations
|
||||
* pv_spurious_wakeup - # of spurious wakeups
|
||||
* pv_wait_again - # of vCPU wait's that happened after a vCPU kick
|
||||
@@ -45,6 +46,7 @@ enum qlock_stats {
|
||||
qstat_pv_kick_wake,
|
||||
qstat_pv_latency_kick,
|
||||
qstat_pv_latency_wake,
|
||||
qstat_pv_lock_slowpath,
|
||||
qstat_pv_lock_stealing,
|
||||
qstat_pv_spurious_wakeup,
|
||||
qstat_pv_wait_again,
|
||||
@@ -70,6 +72,7 @@ static const char * const qstat_names[qstat_num + 1] = {
|
||||
[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
|
||||
[qstat_pv_latency_kick] = "pv_latency_kick",
|
||||
[qstat_pv_latency_wake] = "pv_latency_wake",
|
||||
[qstat_pv_lock_slowpath] = "pv_lock_slowpath",
|
||||
[qstat_pv_lock_stealing] = "pv_lock_stealing",
|
||||
[qstat_pv_wait_again] = "pv_wait_again",
|
||||
[qstat_pv_wait_early] = "pv_wait_early",
|
||||
@@ -279,19 +282,6 @@ static inline void __pv_wait(u8 *ptr, u8 val)
|
||||
#define pv_kick(c) __pv_kick(c)
|
||||
#define pv_wait(p, v) __pv_wait(p, v)
|
||||
|
||||
/*
|
||||
* PV unfair trylock count tracking function
|
||||
*/
|
||||
static inline int qstat_spin_steal_lock(struct qspinlock *lock)
|
||||
{
|
||||
int ret = pv_queued_spin_steal_lock(lock);
|
||||
|
||||
qstat_inc(qstat_pv_lock_stealing, ret);
|
||||
return ret;
|
||||
}
|
||||
#undef queued_spin_trylock
|
||||
#define queued_spin_trylock(l) qstat_spin_steal_lock(l)
|
||||
|
||||
#else /* CONFIG_QUEUED_LOCK_STAT */
|
||||
|
||||
static inline void qstat_inc(enum qlock_stats stat, bool cond) { }
|
||||
|
@@ -29,10 +29,10 @@ __weak void __iomem *ioremap_cache(resource_size_t offset, unsigned long size)
|
||||
|
||||
static void *try_ram_remap(resource_size_t offset, size_t size)
|
||||
{
|
||||
struct page *page = pfn_to_page(offset >> PAGE_SHIFT);
|
||||
unsigned long pfn = PHYS_PFN(offset);
|
||||
|
||||
/* In the simple case just return the existing linear address */
|
||||
if (!PageHighMem(page))
|
||||
if (pfn_valid(pfn) && !PageHighMem(pfn_to_page(pfn)))
|
||||
return __va(offset);
|
||||
return NULL; /* fallback to ioremap_cache */
|
||||
}
|
||||
@@ -47,7 +47,7 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
|
||||
* being mapped does not have i/o side effects and the __iomem
|
||||
* annotation is not applicable.
|
||||
*
|
||||
* MEMREMAP_WB - matches the default mapping for "System RAM" on
|
||||
* MEMREMAP_WB - matches the default mapping for System RAM on
|
||||
* the architecture. This is usually a read-allocate write-back cache.
|
||||
* Morever, if MEMREMAP_WB is specified and the requested remap region is RAM
|
||||
* memremap() will bypass establishing a new mapping and instead return
|
||||
@@ -56,11 +56,12 @@ static void *try_ram_remap(resource_size_t offset, size_t size)
|
||||
* MEMREMAP_WT - establish a mapping whereby writes either bypass the
|
||||
* cache or are written through to memory and never exist in a
|
||||
* cache-dirty state with respect to program visibility. Attempts to
|
||||
* map "System RAM" with this mapping type will fail.
|
||||
* map System RAM with this mapping type will fail.
|
||||
*/
|
||||
void *memremap(resource_size_t offset, size_t size, unsigned long flags)
|
||||
{
|
||||
int is_ram = region_intersects(offset, size, "System RAM");
|
||||
int is_ram = region_intersects(offset, size,
|
||||
IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
|
||||
void *addr = NULL;
|
||||
|
||||
if (is_ram == REGION_MIXED) {
|
||||
@@ -76,7 +77,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
|
||||
* MEMREMAP_WB is special in that it can be satisifed
|
||||
* from the direct map. Some archs depend on the
|
||||
* capability of memremap() to autodetect cases where
|
||||
* the requested range is potentially in "System RAM"
|
||||
* the requested range is potentially in System RAM.
|
||||
*/
|
||||
if (is_ram == REGION_INTERSECTS)
|
||||
addr = try_ram_remap(offset, size);
|
||||
@@ -88,7 +89,7 @@ void *memremap(resource_size_t offset, size_t size, unsigned long flags)
|
||||
* If we don't have a mapping yet and more request flags are
|
||||
* pending then we will be attempting to establish a new virtual
|
||||
* address mapping. Enforce that this mapping is not aliasing
|
||||
* "System RAM"
|
||||
* System RAM.
|
||||
*/
|
||||
if (!addr && is_ram == REGION_INTERSECTS && flags) {
|
||||
WARN_ONCE(1, "memremap attempted on ram %pa size: %#lx\n",
|
||||
@@ -270,13 +271,17 @@ struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
|
||||
void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
struct percpu_ref *ref, struct vmem_altmap *altmap)
|
||||
{
|
||||
int is_ram = region_intersects(res->start, resource_size(res),
|
||||
"System RAM");
|
||||
resource_size_t key, align_start, align_size, align_end;
|
||||
struct dev_pagemap *pgmap;
|
||||
struct page_map *page_map;
|
||||
int error, nid, is_ram;
|
||||
unsigned long pfn;
|
||||
int error, nid;
|
||||
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
|
||||
- align_start;
|
||||
is_ram = region_intersects(align_start, align_size,
|
||||
IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
|
||||
|
||||
if (is_ram == REGION_MIXED) {
|
||||
WARN_ONCE(1, "%s attempted on mixed region %pr\n",
|
||||
@@ -314,8 +319,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
|
||||
mutex_lock(&pgmap_lock);
|
||||
error = 0;
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
align_end = align_start + align_size - 1;
|
||||
for (key = align_start; key <= align_end; key += SECTION_SIZE) {
|
||||
struct dev_pagemap *dup;
|
||||
@@ -351,8 +354,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
for_each_device_pfn(pfn, page_map) {
|
||||
struct page *page = pfn_to_page(pfn);
|
||||
|
||||
/* ZONE_DEVICE pages must never appear on a slab lru */
|
||||
list_force_poison(&page->lru);
|
||||
/*
|
||||
* ZONE_DEVICE pages union ->lru with a ->pgmap back
|
||||
* pointer. It is a bug if a ZONE_DEVICE page is ever
|
||||
* freed or placed on a driver-private list. Seed the
|
||||
* storage with LIST_POISON* values.
|
||||
*/
|
||||
list_del(&page->lru);
|
||||
page->pgmap = pgmap;
|
||||
}
|
||||
devres_add(dev, page_map);
|
||||
|
@@ -59,6 +59,7 @@ int profile_setup(char *str)
|
||||
|
||||
if (!strncmp(str, sleepstr, strlen(sleepstr))) {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
force_schedstat_enabled();
|
||||
prof_on = SLEEP_PROFILING;
|
||||
if (str[strlen(sleepstr)] == ',')
|
||||
str += strlen(sleepstr) + 1;
|
||||
|
@@ -932,12 +932,14 @@ rcu_torture_writer(void *arg)
|
||||
int nsynctypes = 0;
|
||||
|
||||
VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Grace periods expedited from boot/sysfs for %s,\n",
|
||||
torture_type, cur_ops->name);
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Testing of dynamic grace-period expediting diabled.\n",
|
||||
torture_type);
|
||||
if (!can_expedite) {
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Grace periods expedited from boot/sysfs for %s,\n",
|
||||
torture_type, cur_ops->name);
|
||||
pr_alert("%s" TORTURE_FLAG
|
||||
" Disabled dynamic grace-period expediting.\n",
|
||||
torture_type);
|
||||
}
|
||||
|
||||
/* Initialize synctype[] array. If none set, take default. */
|
||||
if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
|
||||
|
@@ -23,7 +23,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/seq_file.h>
|
||||
|
||||
@@ -122,18 +122,7 @@ free_out:
|
||||
debugfs_remove_recursive(rcudir);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __exit rcutiny_trace_cleanup(void)
|
||||
{
|
||||
debugfs_remove_recursive(rcudir);
|
||||
}
|
||||
|
||||
module_init(rcutiny_trace_init);
|
||||
module_exit(rcutiny_trace_cleanup);
|
||||
|
||||
MODULE_AUTHOR("Paul E. McKenney");
|
||||
MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation");
|
||||
MODULE_LICENSE("GPL");
|
||||
device_initcall(rcutiny_trace_init);
|
||||
|
||||
static void check_cpu_stall(struct rcu_ctrlblk *rcp)
|
||||
{
|
||||
|
@@ -108,7 +108,6 @@ RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
|
||||
RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
|
||||
|
||||
static struct rcu_state *const rcu_state_p;
|
||||
static struct rcu_data __percpu *const rcu_data_p;
|
||||
LIST_HEAD(rcu_struct_flavors);
|
||||
|
||||
/* Dump rcu_node combining tree at boot to verify correct setup. */
|
||||
@@ -1083,13 +1082,12 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
|
||||
rcu_sysidle_check_cpu(rdp, isidle, maxj);
|
||||
if ((rdp->dynticks_snap & 0x1) == 0) {
|
||||
trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
|
||||
return 1;
|
||||
} else {
|
||||
if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
|
||||
rdp->mynode->gpnum))
|
||||
WRITE_ONCE(rdp->gpwrap, true);
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1173,15 +1171,16 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp,
|
||||
smp_mb(); /* ->cond_resched_completed before *rcrmp. */
|
||||
WRITE_ONCE(*rcrmp,
|
||||
READ_ONCE(*rcrmp) + rdp->rsp->flavor_mask);
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
rdp->rsp->jiffies_resched += 5; /* Enable beating. */
|
||||
} else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) {
|
||||
/* Time to beat on that CPU again! */
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
|
||||
}
|
||||
rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */
|
||||
}
|
||||
|
||||
/* And if it has been a really long time, kick the CPU as well. */
|
||||
if (ULONG_CMP_GE(jiffies,
|
||||
rdp->rsp->gp_start + 2 * jiffies_till_sched_qs) ||
|
||||
ULONG_CMP_GE(jiffies, rdp->rsp->gp_start + jiffies_till_sched_qs))
|
||||
resched_cpu(rdp->cpu); /* Force CPU into scheduler. */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1246,7 +1245,7 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
|
||||
if (rnp->qsmask & (1UL << cpu))
|
||||
dump_cpu_task(rnp->grplo + cpu);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1266,12 +1265,12 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
delta = jiffies - READ_ONCE(rsp->jiffies_stall);
|
||||
if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
WRITE_ONCE(rsp->jiffies_stall,
|
||||
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/*
|
||||
* OK, time to rat on our buddy...
|
||||
@@ -1292,7 +1291,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
|
||||
ndetected++;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
print_cpu_stall_info_end();
|
||||
@@ -1357,7 +1356,7 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
||||
if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall)))
|
||||
WRITE_ONCE(rsp->jiffies_stall,
|
||||
jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/*
|
||||
* Attempt to revive the RCU machinery by forcing a context switch.
|
||||
@@ -1595,7 +1594,7 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
|
||||
}
|
||||
unlock_out:
|
||||
if (rnp != rnp_root)
|
||||
raw_spin_unlock(&rnp_root->lock);
|
||||
raw_spin_unlock_rcu_node(rnp_root);
|
||||
out:
|
||||
if (c_out != NULL)
|
||||
*c_out = c;
|
||||
@@ -1614,7 +1613,6 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
int needmore;
|
||||
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
|
||||
|
||||
rcu_nocb_gp_cleanup(rsp, rnp);
|
||||
rnp->need_future_gp[c & 0x1] = 0;
|
||||
needmore = rnp->need_future_gp[(c + 1) & 0x1];
|
||||
trace_rcu_future_gp(rnp, rdp, c,
|
||||
@@ -1635,7 +1633,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
|
||||
!READ_ONCE(rsp->gp_flags) ||
|
||||
!rsp->gp_kthread)
|
||||
return;
|
||||
wake_up(&rsp->gp_wq);
|
||||
swake_up(&rsp->gp_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1815,7 +1813,7 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
return;
|
||||
}
|
||||
needwake = __note_gp_changes(rsp, rnp, rdp);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
}
|
||||
@@ -1840,7 +1838,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
raw_spin_lock_irq_rcu_node(rnp);
|
||||
if (!READ_ONCE(rsp->gp_flags)) {
|
||||
/* Spurious wakeup, tell caller to go back to sleep. */
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
return false;
|
||||
}
|
||||
WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */
|
||||
@@ -1850,7 +1848,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
* Grace period already in progress, don't start another.
|
||||
* Not supposed to be able to happen.
|
||||
*/
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1859,7 +1857,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
/* Record GP times before starting GP, hence smp_store_release(). */
|
||||
smp_store_release(&rsp->gpnum, rsp->gpnum + 1);
|
||||
trace_rcu_grace_period(rsp->name, rsp->gpnum, TPS("start"));
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
|
||||
/*
|
||||
* Apply per-leaf buffered online and offline operations to the
|
||||
@@ -1873,7 +1871,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
|
||||
!rnp->wait_blkd_tasks) {
|
||||
/* Nothing to do on this leaf rcu_node structure. */
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1907,7 +1905,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
rcu_cleanup_dead_rnp(rnp);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1938,7 +1936,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
|
||||
trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
|
||||
rnp->level, rnp->grplo,
|
||||
rnp->grphi, rnp->qsmask);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
cond_resched_rcu_qs();
|
||||
WRITE_ONCE(rsp->gp_activity, jiffies);
|
||||
}
|
||||
@@ -1996,7 +1994,7 @@ static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
|
||||
raw_spin_lock_irq_rcu_node(rnp);
|
||||
WRITE_ONCE(rsp->gp_flags,
|
||||
READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2010,6 +2008,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
int nocb = 0;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
struct swait_queue_head *sq;
|
||||
|
||||
WRITE_ONCE(rsp->gp_activity, jiffies);
|
||||
raw_spin_lock_irq_rcu_node(rnp);
|
||||
@@ -2025,7 +2024,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
* safe for us to drop the lock in order to mark the grace
|
||||
* period as completed in all of the rcu_node structures.
|
||||
*/
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
|
||||
/*
|
||||
* Propagate new ->completed value to rcu_node structures so
|
||||
@@ -2046,7 +2045,9 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
|
||||
/* smp_mb() provided by prior unlock-lock pair. */
|
||||
nocb += rcu_future_gp_cleanup(rsp, rnp);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
sq = rcu_nocb_gp_get(rnp);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
rcu_nocb_gp_cleanup(sq);
|
||||
cond_resched_rcu_qs();
|
||||
WRITE_ONCE(rsp->gp_activity, jiffies);
|
||||
rcu_gp_slow(rsp, gp_cleanup_delay);
|
||||
@@ -2068,7 +2069,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
READ_ONCE(rsp->gpnum),
|
||||
TPS("newreq"));
|
||||
}
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
raw_spin_unlock_irq_rcu_node(rnp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2092,7 +2093,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
||||
READ_ONCE(rsp->gpnum),
|
||||
TPS("reqwait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_GPS;
|
||||
wait_event_interruptible(rsp->gp_wq,
|
||||
swait_event_interruptible(rsp->gp_wq,
|
||||
READ_ONCE(rsp->gp_flags) &
|
||||
RCU_GP_FLAG_INIT);
|
||||
rsp->gp_state = RCU_GP_DONE_GPS;
|
||||
@@ -2122,7 +2123,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
||||
READ_ONCE(rsp->gpnum),
|
||||
TPS("fqswait"));
|
||||
rsp->gp_state = RCU_GP_WAIT_FQS;
|
||||
ret = wait_event_interruptible_timeout(rsp->gp_wq,
|
||||
ret = swait_event_interruptible_timeout(rsp->gp_wq,
|
||||
rcu_gp_fqs_check_wake(rsp, &gf), j);
|
||||
rsp->gp_state = RCU_GP_DOING_FQS;
|
||||
/* Locking provides needed memory barriers. */
|
||||
@@ -2234,19 +2235,21 @@ static bool rcu_start_gp(struct rcu_state *rsp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Report a full set of quiescent states to the specified rcu_state
|
||||
* data structure. This involves cleaning up after the prior grace
|
||||
* period and letting rcu_start_gp() start up the next grace period
|
||||
* if one is needed. Note that the caller must hold rnp->lock, which
|
||||
* is released before return.
|
||||
* Report a full set of quiescent states to the specified rcu_state data
|
||||
* structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period
|
||||
* kthread if another grace period is required. Whether we wake
|
||||
* the grace-period kthread or it awakens itself for the next round
|
||||
* of quiescent-state forcing, that kthread will clean up after the
|
||||
* just-completed grace period. Note that the caller must hold rnp->lock,
|
||||
* which is released before return.
|
||||
*/
|
||||
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
|
||||
__releases(rcu_get_root(rsp)->lock)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
|
||||
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
|
||||
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2275,7 +2278,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
|
||||
* Our bit has already been cleared, or the
|
||||
* relevant grace period is already over, so done.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
|
||||
@@ -2287,7 +2290,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
|
||||
if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
|
||||
|
||||
/* Other bits still set at this level, so done. */
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
mask = rnp->grpmask;
|
||||
@@ -2297,7 +2300,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
|
||||
|
||||
break;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
rnp_c = rnp;
|
||||
rnp = rnp->parent;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
@@ -2329,7 +2332,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
|
||||
|
||||
if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
|
||||
rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return; /* Still need more quiescent states! */
|
||||
}
|
||||
|
||||
@@ -2346,19 +2349,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
|
||||
/* Report up the rest of the hierarchy, tracking current ->gpnum. */
|
||||
gps = rnp->gpnum;
|
||||
mask = rnp->grpmask;
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
|
||||
rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record a quiescent state for the specified CPU to that CPU's rcu_data
|
||||
* structure. This must be either called from the specified CPU, or
|
||||
* called when the specified CPU is known to be offline (and when it is
|
||||
* also known that no other CPU is concurrently trying to help the offline
|
||||
* CPU). The lastcomp argument is used to make sure we are still in the
|
||||
* grace period of interest. We don't want to end the current grace period
|
||||
* based on quiescent states detected in an earlier grace period!
|
||||
* structure. This must be called from the specified CPU.
|
||||
*/
|
||||
static void
|
||||
rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
@@ -2383,14 +2381,14 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
*/
|
||||
rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */
|
||||
rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_qs_ctr);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
mask = rdp->grpmask;
|
||||
if ((rnp->qsmask & mask) == 0) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
} else {
|
||||
rdp->core_needs_qs = 0;
|
||||
rdp->core_needs_qs = false;
|
||||
|
||||
/*
|
||||
* This GP can't end until cpu checks in, so all of our
|
||||
@@ -2599,35 +2597,14 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
|
||||
rnp->qsmaskinit &= ~mask;
|
||||
rnp->qsmask &= ~mask;
|
||||
if (rnp->qsmaskinit) {
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp);
|
||||
/* irqs remain disabled. */
|
||||
return;
|
||||
}
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
|
||||
* function. We now remove it from the rcu_node tree's ->qsmaskinit
|
||||
* bit masks.
|
||||
*/
|
||||
static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return;
|
||||
|
||||
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
|
||||
mask = rdp->grpmask;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
|
||||
rnp->qsmaskinitnext &= ~mask;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* The CPU has been completely removed, and some other CPU is reporting
|
||||
* this fact from process context. Do the remainder of the cleanup,
|
||||
@@ -2859,7 +2836,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
|
||||
rcu_report_qs_rnp(mask, rsp, rnp, rnp->gpnum, flags);
|
||||
} else {
|
||||
/* Nothing to do here, so just drop the lock. */
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2895,12 +2872,12 @@ static void force_quiescent_state(struct rcu_state *rsp)
|
||||
raw_spin_unlock(&rnp_old->fqslock);
|
||||
if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
|
||||
rsp->n_force_qs_lh++;
|
||||
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
|
||||
return; /* Someone beat us to it. */
|
||||
}
|
||||
WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
|
||||
raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
|
||||
swake_up(&rsp->gp_wq); /* Memory barrier implied by swake_up() path. */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2925,7 +2902,7 @@ __rcu_process_callbacks(struct rcu_state *rsp)
|
||||
if (cpu_needs_another_gp(rsp, rdp)) {
|
||||
raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
|
||||
needwake = rcu_start_gp(rsp);
|
||||
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
} else {
|
||||
@@ -3016,7 +2993,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
|
||||
|
||||
raw_spin_lock_rcu_node(rnp_root);
|
||||
needwake = rcu_start_gp(rsp);
|
||||
raw_spin_unlock(&rnp_root->lock);
|
||||
raw_spin_unlock_rcu_node(rnp_root);
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
} else {
|
||||
@@ -3436,14 +3413,14 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
|
||||
rcu_for_each_leaf_node(rsp, rnp) {
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (rnp->expmaskinit == rnp->expmaskinitnext) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
continue; /* No new CPUs, nothing to do. */
|
||||
}
|
||||
|
||||
/* Update this node's mask, track old value for propagation. */
|
||||
oldmask = rnp->expmaskinit;
|
||||
rnp->expmaskinit = rnp->expmaskinitnext;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/* If was already nonzero, nothing to propagate. */
|
||||
if (oldmask)
|
||||
@@ -3458,7 +3435,7 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
|
||||
if (rnp_up->expmaskinit)
|
||||
done = true;
|
||||
rnp_up->expmaskinit |= mask;
|
||||
raw_spin_unlock_irqrestore(&rnp_up->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp_up, flags);
|
||||
if (done)
|
||||
break;
|
||||
mask = rnp_up->grpmask;
|
||||
@@ -3481,7 +3458,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
WARN_ON_ONCE(rnp->expmask);
|
||||
rnp->expmask = rnp->expmaskinit;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3522,19 +3499,19 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
if (!rnp->expmask)
|
||||
rcu_initiate_boost(rnp, flags);
|
||||
else
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
break;
|
||||
}
|
||||
if (rnp->parent == NULL) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
if (wake) {
|
||||
smp_mb(); /* EGP done before wake_up(). */
|
||||
wake_up(&rsp->expedited_wq);
|
||||
swake_up(&rsp->expedited_wq);
|
||||
}
|
||||
break;
|
||||
}
|
||||
mask = rnp->grpmask;
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled */
|
||||
rnp = rnp->parent;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled */
|
||||
WARN_ON_ONCE(!(rnp->expmask & mask));
|
||||
@@ -3569,7 +3546,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (!(rnp->expmask & mask)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
rnp->expmask &= ~mask;
|
||||
@@ -3730,7 +3707,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
|
||||
*/
|
||||
if (rcu_preempt_has_tasks(rnp))
|
||||
rnp->exp_tasks = rnp->blkd_tasks.next;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
|
||||
/* IPI the remaining CPUs for expedited quiescent state. */
|
||||
mask = 1;
|
||||
@@ -3747,7 +3724,7 @@ retry_ipi:
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (cpu_online(cpu) &&
|
||||
(rnp->expmask & mask)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
schedule_timeout_uninterruptible(1);
|
||||
if (cpu_online(cpu) &&
|
||||
(rnp->expmask & mask))
|
||||
@@ -3756,7 +3733,7 @@ retry_ipi:
|
||||
}
|
||||
if (!(rnp->expmask & mask))
|
||||
mask_ofl_ipi &= ~mask;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
/* Report quiescent states for those that went offline. */
|
||||
mask_ofl_test |= mask_ofl_ipi;
|
||||
@@ -3780,7 +3757,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
|
||||
jiffies_start = jiffies;
|
||||
|
||||
for (;;) {
|
||||
ret = wait_event_interruptible_timeout(
|
||||
ret = swait_event_timeout(
|
||||
rsp->expedited_wq,
|
||||
sync_rcu_preempt_exp_done(rnp_root),
|
||||
jiffies_stall);
|
||||
@@ -3788,7 +3765,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
|
||||
return;
|
||||
if (ret < 0) {
|
||||
/* Hit a signal, disable CPU stall warnings. */
|
||||
wait_event(rsp->expedited_wq,
|
||||
swait_event(rsp->expedited_wq,
|
||||
sync_rcu_preempt_exp_done(rnp_root));
|
||||
return;
|
||||
}
|
||||
@@ -4163,7 +4140,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
|
||||
return;
|
||||
raw_spin_lock_rcu_node(rnp); /* Interrupts already disabled. */
|
||||
rnp->qsmaskinit |= mask;
|
||||
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* Interrupts remain disabled. */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4187,7 +4164,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
rdp->rsp = rsp;
|
||||
mutex_init(&rdp->exp_funnel_mutex);
|
||||
rcu_boot_init_nocb_percpu_data(rdp);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4215,7 +4192,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
rcu_sysidle_init_percpu_data(rdp->dynticks);
|
||||
atomic_set(&rdp->dynticks->dynticks,
|
||||
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
|
||||
/*
|
||||
* Add CPU to leaf rcu_node pending-online bitmask. Any needed
|
||||
@@ -4236,7 +4213,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
rdp->rcu_qs_ctr_snap = per_cpu(rcu_qs_ctr, cpu);
|
||||
rdp->core_needs_qs = false;
|
||||
trace_rcu_grace_period(rsp->name, rdp->gpnum, TPS("cpuonl"));
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
static void rcu_prepare_cpu(int cpu)
|
||||
@@ -4247,6 +4224,46 @@ static void rcu_prepare_cpu(int cpu)
|
||||
rcu_init_percpu_data(cpu, rsp);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/*
|
||||
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
|
||||
* function. We now remove it from the rcu_node tree's ->qsmaskinit
|
||||
* bit masks.
|
||||
* The CPU is exiting the idle loop into the arch_cpu_idle_dead()
|
||||
* function. We now remove it from the rcu_node tree's ->qsmaskinit
|
||||
* bit masks.
|
||||
*/
|
||||
static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long flags;
|
||||
unsigned long mask;
|
||||
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
|
||||
return;
|
||||
|
||||
/* Remove outgoing CPU from mask in the leaf rcu_node structure. */
|
||||
mask = rdp->grpmask;
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
|
||||
rnp->qsmaskinitnext &= ~mask;
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
void rcu_report_dead(unsigned int cpu)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
/* QS for any half-done expedited RCU-sched GP. */
|
||||
preempt_disable();
|
||||
rcu_report_exp_rdp(&rcu_sched_state,
|
||||
this_cpu_ptr(rcu_sched_state.rda), true);
|
||||
preempt_enable();
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dying_idle_cpu(cpu, rsp);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Handle CPU online/offline notification events.
|
||||
*/
|
||||
@@ -4278,17 +4295,6 @@ int rcu_cpu_notify(struct notifier_block *self,
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dying_cpu(rsp);
|
||||
break;
|
||||
case CPU_DYING_IDLE:
|
||||
/* QS for any half-done expedited RCU-sched GP. */
|
||||
preempt_disable();
|
||||
rcu_report_exp_rdp(&rcu_sched_state,
|
||||
this_cpu_ptr(rcu_sched_state.rda), true);
|
||||
preempt_enable();
|
||||
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rcu_cleanup_dying_idle_cpu(cpu, rsp);
|
||||
}
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
case CPU_UP_CANCELED:
|
||||
@@ -4358,7 +4364,7 @@ static int __init rcu_spawn_gp_kthread(void)
|
||||
sp.sched_priority = kthread_prio;
|
||||
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
wake_up_process(t);
|
||||
}
|
||||
rcu_spawn_nocb_kthreads();
|
||||
@@ -4449,8 +4455,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
|
||||
cpustride *= levelspread[i];
|
||||
rnp = rsp->level[i];
|
||||
for (j = 0; j < levelcnt[i]; j++, rnp++) {
|
||||
raw_spin_lock_init(&rnp->lock);
|
||||
lockdep_set_class_and_name(&rnp->lock,
|
||||
raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
|
||||
lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
|
||||
&rcu_node_class[i], buf[i]);
|
||||
raw_spin_lock_init(&rnp->fqslock);
|
||||
lockdep_set_class_and_name(&rnp->fqslock,
|
||||
@@ -4482,8 +4488,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
|
||||
}
|
||||
}
|
||||
|
||||
init_waitqueue_head(&rsp->gp_wq);
|
||||
init_waitqueue_head(&rsp->expedited_wq);
|
||||
init_swait_queue_head(&rsp->gp_wq);
|
||||
init_swait_queue_head(&rsp->expedited_wq);
|
||||
rnp = rsp->level[rcu_num_lvls - 1];
|
||||
for_each_possible_cpu(i) {
|
||||
while (i > rnp->grphi)
|
||||
|
@@ -27,6 +27,7 @@
|
||||
#include <linux/threads.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seqlock.h>
|
||||
#include <linux/swait.h>
|
||||
#include <linux/stop_machine.h>
|
||||
|
||||
/*
|
||||
@@ -149,8 +150,9 @@ struct rcu_dynticks {
|
||||
* Definition for node within the RCU grace-period-detection hierarchy.
|
||||
*/
|
||||
struct rcu_node {
|
||||
raw_spinlock_t lock; /* Root rcu_node's lock protects some */
|
||||
/* rcu_state fields as well as following. */
|
||||
raw_spinlock_t __private lock; /* Root rcu_node's lock protects */
|
||||
/* some rcu_state fields as well as */
|
||||
/* following. */
|
||||
unsigned long gpnum; /* Current grace period for this node. */
|
||||
/* This will either be equal to or one */
|
||||
/* behind the root rcu_node's gpnum. */
|
||||
@@ -243,7 +245,7 @@ struct rcu_node {
|
||||
/* Refused to boost: not sure why, though. */
|
||||
/* This can happen due to race conditions. */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
wait_queue_head_t nocb_gp_wq[2];
|
||||
struct swait_queue_head nocb_gp_wq[2];
|
||||
/* Place for rcu_nocb_kthread() to wait GP. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
int need_future_gp[2];
|
||||
@@ -399,7 +401,7 @@ struct rcu_data {
|
||||
atomic_long_t nocb_q_count_lazy; /* invocation (all stages). */
|
||||
struct rcu_head *nocb_follower_head; /* CBs ready to invoke. */
|
||||
struct rcu_head **nocb_follower_tail;
|
||||
wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct swait_queue_head nocb_wq; /* For nocb kthreads to sleep on. */
|
||||
struct task_struct *nocb_kthread;
|
||||
int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
|
||||
|
||||
@@ -478,7 +480,7 @@ struct rcu_state {
|
||||
unsigned long gpnum; /* Current gp number. */
|
||||
unsigned long completed; /* # of last completed gp. */
|
||||
struct task_struct *gp_kthread; /* Task for grace periods. */
|
||||
wait_queue_head_t gp_wq; /* Where GP task waits. */
|
||||
struct swait_queue_head gp_wq; /* Where GP task waits. */
|
||||
short gp_flags; /* Commands for GP task. */
|
||||
short gp_state; /* GP kthread sleep state. */
|
||||
|
||||
@@ -506,7 +508,7 @@ struct rcu_state {
|
||||
unsigned long expedited_sequence; /* Take a ticket. */
|
||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
||||
struct swait_queue_head expedited_wq; /* Wait for check-ins. */
|
||||
int ncpus_snap; /* # CPUs seen last time. */
|
||||
|
||||
unsigned long jiffies_force_qs; /* Time at which to invoke */
|
||||
@@ -621,7 +623,8 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp);
|
||||
static void increment_cpu_stall_ticks(void);
|
||||
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
|
||||
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp);
|
||||
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
bool lazy, unsigned long flags);
|
||||
@@ -680,7 +683,7 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
|
||||
#endif /* #else #ifdef CONFIG_PPC */
|
||||
|
||||
/*
|
||||
* Wrappers for the rcu_node::lock acquire.
|
||||
* Wrappers for the rcu_node::lock acquire and release.
|
||||
*
|
||||
* Because the rcu_nodes form a tree, the tree traversal locking will observe
|
||||
* different lock values, this in turn means that an UNLOCK of one level
|
||||
@@ -689,29 +692,48 @@ static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
|
||||
*
|
||||
* In order to restore full ordering between tree levels, augment the regular
|
||||
* lock acquire functions with smp_mb__after_unlock_lock().
|
||||
*
|
||||
* As ->lock of struct rcu_node is a __private field, therefore one should use
|
||||
* these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
|
||||
*/
|
||||
static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
|
||||
{
|
||||
raw_spin_lock(&rnp->lock);
|
||||
raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
|
||||
smp_mb__after_unlock_lock();
|
||||
}
|
||||
|
||||
static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
|
||||
{
|
||||
raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
|
||||
}
|
||||
|
||||
static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
|
||||
{
|
||||
raw_spin_lock_irq(&rnp->lock);
|
||||
raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
|
||||
smp_mb__after_unlock_lock();
|
||||
}
|
||||
|
||||
#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
|
||||
do { \
|
||||
typecheck(unsigned long, flags); \
|
||||
raw_spin_lock_irqsave(&(rnp)->lock, flags); \
|
||||
smp_mb__after_unlock_lock(); \
|
||||
static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
|
||||
{
|
||||
raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
|
||||
}
|
||||
|
||||
#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
|
||||
do { \
|
||||
typecheck(unsigned long, flags); \
|
||||
raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \
|
||||
smp_mb__after_unlock_lock(); \
|
||||
} while (0)
|
||||
|
||||
#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \
|
||||
do { \
|
||||
typecheck(unsigned long, flags); \
|
||||
raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \
|
||||
} while (0)
|
||||
|
||||
static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
|
||||
{
|
||||
bool locked = raw_spin_trylock(&rnp->lock);
|
||||
bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
|
||||
|
||||
if (locked)
|
||||
smp_mb__after_unlock_lock();
|
||||
|
@@ -235,7 +235,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
rnp->gp_tasks = &t->rcu_node_entry;
|
||||
if (!rnp->exp_tasks && (blkd_state & RCU_EXP_BLKD))
|
||||
rnp->exp_tasks = &t->rcu_node_entry;
|
||||
raw_spin_unlock(&rnp->lock); /* rrupts remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* interrupts remain disabled. */
|
||||
|
||||
/*
|
||||
* Report the quiescent state for the expedited GP. This expedited
|
||||
@@ -489,7 +489,7 @@ void rcu_read_unlock_special(struct task_struct *t)
|
||||
!!rnp->gp_tasks);
|
||||
rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags);
|
||||
} else {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
/* Unboost if we were boosted. */
|
||||
@@ -518,14 +518,14 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
|
||||
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
if (!rcu_preempt_blocked_readers_cgp(rnp)) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
t = list_entry(rnp->gp_tasks->prev,
|
||||
struct task_struct, rcu_node_entry);
|
||||
list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry)
|
||||
sched_show_task(t);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -807,7 +807,6 @@ void exit_rcu(void)
|
||||
#else /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
|
||||
static struct rcu_state *const rcu_state_p = &rcu_sched_state;
|
||||
static struct rcu_data __percpu *const rcu_data_p = &rcu_sched_data;
|
||||
|
||||
/*
|
||||
* Tell them what RCU they are running.
|
||||
@@ -991,7 +990,7 @@ static int rcu_boost(struct rcu_node *rnp)
|
||||
* might exit their RCU read-side critical sections on their own.
|
||||
*/
|
||||
if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL) {
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1028,7 +1027,7 @@ static int rcu_boost(struct rcu_node *rnp)
|
||||
*/
|
||||
t = container_of(tb, struct task_struct, rcu_node_entry);
|
||||
rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
/* Lock only for side effect: boosts task t's priority. */
|
||||
rt_mutex_lock(&rnp->boost_mtx);
|
||||
rt_mutex_unlock(&rnp->boost_mtx); /* Then keep lockdep happy. */
|
||||
@@ -1088,7 +1087,7 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
|
||||
if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
|
||||
rnp->n_balk_exp_gp_tasks++;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
return;
|
||||
}
|
||||
if (rnp->exp_tasks != NULL ||
|
||||
@@ -1098,13 +1097,13 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
ULONG_CMP_GE(jiffies, rnp->boost_time))) {
|
||||
if (rnp->exp_tasks == NULL)
|
||||
rnp->boost_tasks = rnp->gp_tasks;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
t = rnp->boost_kthread_task;
|
||||
if (t)
|
||||
rcu_wake_cond(t, rnp->boost_kthread_status);
|
||||
} else {
|
||||
rcu_initiate_boost_trace(rnp);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1172,7 +1171,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
return PTR_ERR(t);
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
rnp->boost_kthread_task = t;
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
sp.sched_priority = kthread_prio;
|
||||
sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
|
||||
wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
|
||||
@@ -1308,7 +1307,7 @@ static void rcu_prepare_kthreads(int cpu)
|
||||
static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
|
||||
__releases(rnp->lock)
|
||||
{
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
}
|
||||
|
||||
static void invoke_rcu_callbacks_kthread(void)
|
||||
@@ -1559,7 +1558,7 @@ static void rcu_prepare_for_idle(void)
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
|
||||
needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake(rsp);
|
||||
}
|
||||
@@ -1811,9 +1810,9 @@ early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
|
||||
* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
|
||||
* grace period.
|
||||
*/
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
|
||||
{
|
||||
wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
|
||||
swake_up_all(sq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1829,10 +1828,15 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
||||
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
|
||||
}
|
||||
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
|
||||
{
|
||||
return &rnp->nocb_gp_wq[rnp->completed & 0x1];
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[0]);
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[1]);
|
||||
init_swait_queue_head(&rnp->nocb_gp_wq[0]);
|
||||
init_swait_queue_head(&rnp->nocb_gp_wq[1]);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_RCU_NOCB_CPU_ALL
|
||||
@@ -1857,7 +1861,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
|
||||
if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
|
||||
/* Prior smp_mb__after_atomic() orders against prior enqueue. */
|
||||
WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
|
||||
wake_up(&rdp_leader->nocb_wq);
|
||||
swake_up(&rdp_leader->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2059,7 +2063,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
||||
|
||||
raw_spin_lock_irqsave_rcu_node(rnp, flags);
|
||||
needwake = rcu_start_future_gp(rnp, rdp, &c);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
|
||||
if (needwake)
|
||||
rcu_gp_kthread_wake(rdp->rsp);
|
||||
|
||||
@@ -2069,7 +2073,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
||||
*/
|
||||
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
|
||||
for (;;) {
|
||||
wait_event_interruptible(
|
||||
swait_event_interruptible(
|
||||
rnp->nocb_gp_wq[c & 0x1],
|
||||
(d = ULONG_CMP_GE(READ_ONCE(rnp->completed), c)));
|
||||
if (likely(d))
|
||||
@@ -2097,7 +2101,7 @@ wait_again:
|
||||
/* Wait for callbacks to appear. */
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, "Sleep");
|
||||
wait_event_interruptible(my_rdp->nocb_wq,
|
||||
swait_event_interruptible(my_rdp->nocb_wq,
|
||||
!READ_ONCE(my_rdp->nocb_leader_sleep));
|
||||
/* Memory barrier handled by smp_mb() calls below and repoll. */
|
||||
} else if (firsttime) {
|
||||
@@ -2172,7 +2176,7 @@ wait_again:
|
||||
* List was empty, wake up the follower.
|
||||
* Memory barriers supplied by atomic_long_add().
|
||||
*/
|
||||
wake_up(&rdp->nocb_wq);
|
||||
swake_up(&rdp->nocb_wq);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2193,7 +2197,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
|
||||
if (!rcu_nocb_poll) {
|
||||
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
|
||||
"FollowerSleep");
|
||||
wait_event_interruptible(rdp->nocb_wq,
|
||||
swait_event_interruptible(rdp->nocb_wq,
|
||||
READ_ONCE(rdp->nocb_follower_head));
|
||||
} else if (firsttime) {
|
||||
/* Don't drown trace log with "Poll"! */
|
||||
@@ -2352,7 +2356,7 @@ void __init rcu_init_nohz(void)
|
||||
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
{
|
||||
rdp->nocb_tail = &rdp->nocb_head;
|
||||
init_waitqueue_head(&rdp->nocb_wq);
|
||||
init_swait_queue_head(&rdp->nocb_wq);
|
||||
rdp->nocb_follower_tail = &rdp->nocb_follower_head;
|
||||
}
|
||||
|
||||
@@ -2502,7 +2506,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -2510,6 +2514,11 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
||||
{
|
||||
}
|
||||
|
||||
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
@@ -128,6 +128,7 @@ bool rcu_gp_is_normal(void)
|
||||
{
|
||||
return READ_ONCE(rcu_normal);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_gp_is_normal);
|
||||
|
||||
static atomic_t rcu_expedited_nesting =
|
||||
ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
|
||||
|
@@ -333,13 +333,13 @@ int release_resource(struct resource *old)
|
||||
EXPORT_SYMBOL(release_resource);
|
||||
|
||||
/*
|
||||
* Finds the lowest iomem reosurce exists with-in [res->start.res->end)
|
||||
* the caller must specify res->start, res->end, res->flags and "name".
|
||||
* If found, returns 0, res is overwritten, if not found, returns -1.
|
||||
* This walks through whole tree and not just first level children
|
||||
* until and unless first_level_children_only is true.
|
||||
* Finds the lowest iomem resource existing within [res->start.res->end).
|
||||
* The caller must specify res->start, res->end, res->flags, and optionally
|
||||
* desc. If found, returns 0, res is overwritten, if not found, returns -1.
|
||||
* This function walks the whole tree and not just first level children until
|
||||
* and unless first_level_children_only is true.
|
||||
*/
|
||||
static int find_next_iomem_res(struct resource *res, char *name,
|
||||
static int find_next_iomem_res(struct resource *res, unsigned long desc,
|
||||
bool first_level_children_only)
|
||||
{
|
||||
resource_size_t start, end;
|
||||
@@ -358,9 +358,9 @@ static int find_next_iomem_res(struct resource *res, char *name,
|
||||
read_lock(&resource_lock);
|
||||
|
||||
for (p = iomem_resource.child; p; p = next_resource(p, sibling_only)) {
|
||||
if (p->flags != res->flags)
|
||||
if ((p->flags & res->flags) != res->flags)
|
||||
continue;
|
||||
if (name && strcmp(p->name, name))
|
||||
if ((desc != IORES_DESC_NONE) && (desc != p->desc))
|
||||
continue;
|
||||
if (p->start > end) {
|
||||
p = NULL;
|
||||
@@ -385,15 +385,18 @@ static int find_next_iomem_res(struct resource *res, char *name,
|
||||
* Walks through iomem resources and calls func() with matching resource
|
||||
* ranges. This walks through whole tree and not just first level children.
|
||||
* All the memory ranges which overlap start,end and also match flags and
|
||||
* name are valid candidates.
|
||||
* desc are valid candidates.
|
||||
*
|
||||
* @name: name of resource
|
||||
* @flags: resource flags
|
||||
* @desc: I/O resource descriptor. Use IORES_DESC_NONE to skip @desc check.
|
||||
* @flags: I/O resource flags
|
||||
* @start: start addr
|
||||
* @end: end addr
|
||||
*
|
||||
* NOTE: For a new descriptor search, define a new IORES_DESC in
|
||||
* <linux/ioport.h> and set it in 'desc' of a target resource entry.
|
||||
*/
|
||||
int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
|
||||
void *arg, int (*func)(u64, u64, void *))
|
||||
int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
|
||||
u64 end, void *arg, int (*func)(u64, u64, void *))
|
||||
{
|
||||
struct resource res;
|
||||
u64 orig_end;
|
||||
@@ -403,23 +406,27 @@ int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
|
||||
res.end = end;
|
||||
res.flags = flags;
|
||||
orig_end = res.end;
|
||||
|
||||
while ((res.start < res.end) &&
|
||||
(!find_next_iomem_res(&res, name, false))) {
|
||||
(!find_next_iomem_res(&res, desc, false))) {
|
||||
|
||||
ret = (*func)(res.start, res.end, arg);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
res.start = res.end + 1;
|
||||
res.end = orig_end;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function calls callback against all memory range of "System RAM"
|
||||
* which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
|
||||
* Now, this function is only for "System RAM". This function deals with
|
||||
* full ranges and not pfn. If resources are not pfn aligned, dealing
|
||||
* with pfn can truncate ranges.
|
||||
* This function calls the @func callback against all memory ranges of type
|
||||
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
|
||||
* Now, this function is only for System RAM, it deals with full ranges and
|
||||
* not PFNs. If resources are not PFN-aligned, dealing with PFNs can truncate
|
||||
* ranges.
|
||||
*/
|
||||
int walk_system_ram_res(u64 start, u64 end, void *arg,
|
||||
int (*func)(u64, u64, void *))
|
||||
@@ -430,10 +437,10 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
|
||||
|
||||
res.start = start;
|
||||
res.end = end;
|
||||
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
|
||||
orig_end = res.end;
|
||||
while ((res.start < res.end) &&
|
||||
(!find_next_iomem_res(&res, "System RAM", true))) {
|
||||
(!find_next_iomem_res(&res, IORES_DESC_NONE, true))) {
|
||||
ret = (*func)(res.start, res.end, arg);
|
||||
if (ret)
|
||||
break;
|
||||
@@ -446,9 +453,9 @@ int walk_system_ram_res(u64 start, u64 end, void *arg,
|
||||
#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
|
||||
|
||||
/*
|
||||
* This function calls callback against all memory range of "System RAM"
|
||||
* which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
|
||||
* Now, this function is only for "System RAM".
|
||||
* This function calls the @func callback against all memory ranges of type
|
||||
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
|
||||
* It is to be used only for System RAM.
|
||||
*/
|
||||
int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
|
||||
void *arg, int (*func)(unsigned long, unsigned long, void *))
|
||||
@@ -460,10 +467,10 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
|
||||
|
||||
res.start = (u64) start_pfn << PAGE_SHIFT;
|
||||
res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
|
||||
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
|
||||
orig_end = res.end;
|
||||
while ((res.start < res.end) &&
|
||||
(find_next_iomem_res(&res, "System RAM", true) >= 0)) {
|
||||
(find_next_iomem_res(&res, IORES_DESC_NONE, true) >= 0)) {
|
||||
pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
end_pfn = (res.end + 1) >> PAGE_SHIFT;
|
||||
if (end_pfn > pfn)
|
||||
@@ -484,7 +491,7 @@ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
|
||||
}
|
||||
/*
|
||||
* This generic page_is_ram() returns true if specified address is
|
||||
* registered as "System RAM" in iomem_resource list.
|
||||
* registered as System RAM in iomem_resource list.
|
||||
*/
|
||||
int __weak page_is_ram(unsigned long pfn)
|
||||
{
|
||||
@@ -496,30 +503,34 @@ EXPORT_SYMBOL_GPL(page_is_ram);
|
||||
* region_intersects() - determine intersection of region with known resources
|
||||
* @start: region start address
|
||||
* @size: size of region
|
||||
* @name: name of resource (in iomem_resource)
|
||||
* @flags: flags of resource (in iomem_resource)
|
||||
* @desc: descriptor of resource (in iomem_resource) or IORES_DESC_NONE
|
||||
*
|
||||
* Check if the specified region partially overlaps or fully eclipses a
|
||||
* resource identified by @name. Return REGION_DISJOINT if the region
|
||||
* does not overlap @name, return REGION_MIXED if the region overlaps
|
||||
* @type and another resource, and return REGION_INTERSECTS if the
|
||||
* region overlaps @type and no other defined resource. Note, that
|
||||
* REGION_INTERSECTS is also returned in the case when the specified
|
||||
* region overlaps RAM and undefined memory holes.
|
||||
* resource identified by @flags and @desc (optional with IORES_DESC_NONE).
|
||||
* Return REGION_DISJOINT if the region does not overlap @flags/@desc,
|
||||
* return REGION_MIXED if the region overlaps @flags/@desc and another
|
||||
* resource, and return REGION_INTERSECTS if the region overlaps @flags/@desc
|
||||
* and no other defined resource. Note that REGION_INTERSECTS is also
|
||||
* returned in the case when the specified region overlaps RAM and undefined
|
||||
* memory holes.
|
||||
*
|
||||
* region_intersect() is used by memory remapping functions to ensure
|
||||
* the user is not remapping RAM and is a vast speed up over walking
|
||||
* through the resource table page by page.
|
||||
*/
|
||||
int region_intersects(resource_size_t start, size_t size, const char *name)
|
||||
int region_intersects(resource_size_t start, size_t size, unsigned long flags,
|
||||
unsigned long desc)
|
||||
{
|
||||
unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
||||
resource_size_t end = start + size - 1;
|
||||
int type = 0; int other = 0;
|
||||
struct resource *p;
|
||||
|
||||
read_lock(&resource_lock);
|
||||
for (p = iomem_resource.child; p ; p = p->sibling) {
|
||||
bool is_type = strcmp(p->name, name) == 0 && p->flags == flags;
|
||||
bool is_type = (((p->flags & flags) == flags) &&
|
||||
((desc == IORES_DESC_NONE) ||
|
||||
(desc == p->desc)));
|
||||
|
||||
if (start >= p->start && start <= p->end)
|
||||
is_type ? type++ : other++;
|
||||
@@ -538,6 +549,7 @@ int region_intersects(resource_size_t start, size_t size, const char *name)
|
||||
|
||||
return REGION_DISJOINT;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(region_intersects);
|
||||
|
||||
void __weak arch_remove_reservations(struct resource *avail)
|
||||
{
|
||||
@@ -948,6 +960,7 @@ static void __init __reserve_region_with_split(struct resource *root,
|
||||
res->start = start;
|
||||
res->end = end;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
res->desc = IORES_DESC_NONE;
|
||||
|
||||
while (1) {
|
||||
|
||||
@@ -982,6 +995,7 @@ static void __init __reserve_region_with_split(struct resource *root,
|
||||
next_res->start = conflict->end + 1;
|
||||
next_res->end = end;
|
||||
next_res->flags = IORESOURCE_BUSY;
|
||||
next_res->desc = IORES_DESC_NONE;
|
||||
}
|
||||
} else {
|
||||
res->start = conflict->end + 1;
|
||||
@@ -1071,8 +1085,9 @@ struct resource * __request_region(struct resource *parent,
|
||||
res->name = name;
|
||||
res->start = start;
|
||||
res->end = start + n - 1;
|
||||
res->flags = resource_type(parent);
|
||||
res->flags = resource_type(parent) | resource_ext_type(parent);
|
||||
res->flags |= IORESOURCE_BUSY | flags;
|
||||
res->desc = IORES_DESC_NONE;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
@@ -1238,6 +1253,7 @@ int release_mem_region_adjustable(struct resource *parent,
|
||||
new_res->start = end + 1;
|
||||
new_res->end = res->end;
|
||||
new_res->flags = res->flags;
|
||||
new_res->desc = res->desc;
|
||||
new_res->parent = res->parent;
|
||||
new_res->sibling = res->sibling;
|
||||
new_res->child = NULL;
|
||||
@@ -1413,6 +1429,7 @@ static int __init reserve_setup(char *str)
|
||||
res->start = io_start;
|
||||
res->end = io_start + io_num - 1;
|
||||
res->flags = IORESOURCE_BUSY;
|
||||
res->desc = IORES_DESC_NONE;
|
||||
res->child = NULL;
|
||||
if (request_resource(res->start >= 0x10000 ? &iomem_resource : &ioport_resource, res) == 0)
|
||||
reserved = x+1;
|
||||
|
@@ -13,7 +13,7 @@ endif
|
||||
|
||||
obj-y += core.o loadavg.o clock.o cputime.o
|
||||
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
|
||||
obj-y += wait.o completion.o idle.o
|
||||
obj-y += wait.o swait.o completion.o idle.o
|
||||
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
|
@@ -61,6 +61,7 @@
|
||||
#include <linux/static_key.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/tick.h>
|
||||
|
||||
/*
|
||||
* Scheduler clock - returns current time in nanosec units.
|
||||
@@ -89,6 +90,8 @@ static void __set_sched_clock_stable(void)
|
||||
{
|
||||
if (!sched_clock_stable())
|
||||
static_key_slow_inc(&__sched_clock_stable);
|
||||
|
||||
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
void set_sched_clock_stable(void)
|
||||
@@ -108,6 +111,8 @@ static void __clear_sched_clock_stable(struct work_struct *work)
|
||||
/* XXX worry about clock continuity */
|
||||
if (sched_clock_stable())
|
||||
static_key_slow_dec(&__sched_clock_stable);
|
||||
|
||||
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable);
|
||||
|
@@ -26,6 +26,7 @@
|
||||
* Thomas Gleixner, Mike Kravetz
|
||||
*/
|
||||
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/nmi.h>
|
||||
@@ -66,12 +67,10 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init_task.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/context_tracking.h>
|
||||
#include <linux/compiler.h>
|
||||
|
||||
@@ -124,138 +123,6 @@ const_debug unsigned int sysctl_sched_features =
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
#ifdef CONFIG_SCHED_DEBUG
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
#name ,
|
||||
|
||||
static const char * const sched_feat_names[] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static int sched_feat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (!(sysctl_sched_features & (1UL << i)))
|
||||
seq_puts(m, "NO_");
|
||||
seq_printf(m, "%s ", sched_feat_names[i]);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
|
||||
#define jump_label_key__true STATIC_KEY_INIT_TRUE
|
||||
#define jump_label_key__false STATIC_KEY_INIT_FALSE
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
jump_label_key__##enabled ,
|
||||
|
||||
struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static void sched_feat_disable(int i)
|
||||
{
|
||||
static_key_disable(&sched_feat_keys[i]);
|
||||
}
|
||||
|
||||
static void sched_feat_enable(int i)
|
||||
{
|
||||
static_key_enable(&sched_feat_keys[i]);
|
||||
}
|
||||
#else
|
||||
static void sched_feat_disable(int i) { };
|
||||
static void sched_feat_enable(int i) { };
|
||||
#endif /* HAVE_JUMP_LABEL */
|
||||
|
||||
static int sched_feat_set(char *cmp)
|
||||
{
|
||||
int i;
|
||||
int neg = 0;
|
||||
|
||||
if (strncmp(cmp, "NO_", 3) == 0) {
|
||||
neg = 1;
|
||||
cmp += 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (strcmp(cmp, sched_feat_names[i]) == 0) {
|
||||
if (neg) {
|
||||
sysctl_sched_features &= ~(1UL << i);
|
||||
sched_feat_disable(i);
|
||||
} else {
|
||||
sysctl_sched_features |= (1UL << i);
|
||||
sched_feat_enable(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
sched_feat_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
char *cmp;
|
||||
int i;
|
||||
struct inode *inode;
|
||||
|
||||
if (cnt > 63)
|
||||
cnt = 63;
|
||||
|
||||
if (copy_from_user(&buf, ubuf, cnt))
|
||||
return -EFAULT;
|
||||
|
||||
buf[cnt] = 0;
|
||||
cmp = strstrip(buf);
|
||||
|
||||
/* Ensure the static_key remains in a consistent state */
|
||||
inode = file_inode(filp);
|
||||
inode_lock(inode);
|
||||
i = sched_feat_set(cmp);
|
||||
inode_unlock(inode);
|
||||
if (i == __SCHED_FEAT_NR)
|
||||
return -EINVAL;
|
||||
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int sched_feat_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_feat_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations sched_feat_fops = {
|
||||
.open = sched_feat_open,
|
||||
.write = sched_feat_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static __init int sched_init_debug(void)
|
||||
{
|
||||
debugfs_create_file("sched_features", 0644, NULL, NULL,
|
||||
&sched_feat_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(sched_init_debug);
|
||||
#endif /* CONFIG_SCHED_DEBUG */
|
||||
|
||||
/*
|
||||
* Number of tasks to iterate in a single balance run.
|
||||
* Limited because this is done with IRQs disabled.
|
||||
@@ -453,20 +320,6 @@ static inline void init_hrtick(void)
|
||||
}
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
|
||||
/*
|
||||
* cmpxchg based fetch_or, macro so it works for different integer types
|
||||
*/
|
||||
#define fetch_or(ptr, val) \
|
||||
({ typeof(*(ptr)) __old, __val = *(ptr); \
|
||||
for (;;) { \
|
||||
__old = cmpxchg((ptr), __val, __val | (val)); \
|
||||
if (__old == __val) \
|
||||
break; \
|
||||
__val = __old; \
|
||||
} \
|
||||
__old; \
|
||||
})
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
|
||||
/*
|
||||
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
|
||||
@@ -715,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
|
||||
#endif /* CONFIG_NO_HZ_COMMON */
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
bool sched_can_stop_tick(void)
|
||||
bool sched_can_stop_tick(struct rq *rq)
|
||||
{
|
||||
int fifo_nr_running;
|
||||
|
||||
/* Deadline tasks, even if single, need the tick */
|
||||
if (rq->dl.dl_nr_running)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* FIFO realtime policy runs the highest priority task. Other runnable
|
||||
* tasks are of a lower priority. The scheduler tick does nothing.
|
||||
* FIFO realtime policy runs the highest priority task (after DEADLINE).
|
||||
* Other runnable tasks are of a lower priority. The scheduler tick
|
||||
* isn't needed.
|
||||
*/
|
||||
if (current->policy == SCHED_FIFO)
|
||||
fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
|
||||
if (fifo_nr_running)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Round-robin realtime tasks time slice with other tasks at the same
|
||||
* realtime priority. Is this task the only one at this priority?
|
||||
* realtime priority.
|
||||
*/
|
||||
if (current->policy == SCHED_RR) {
|
||||
struct sched_rt_entity *rt_se = ¤t->rt;
|
||||
|
||||
return list_is_singular(&rt_se->run_list);
|
||||
if (rq->rt.rr_nr_running) {
|
||||
if (rq->rt.rr_nr_running == 1)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* More than one running task need preemption.
|
||||
* nr_running update is assumed to be visible
|
||||
* after IPI is sent from wakers.
|
||||
*/
|
||||
if (this_rq()->nr_running > 1)
|
||||
/* Normal multitasking need periodic preemption checks */
|
||||
if (rq->cfs.nr_running > 1)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@@ -2093,7 +1951,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
|
||||
ttwu_queue(p, cpu);
|
||||
stat:
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
if (schedstat_enabled())
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
@@ -2141,7 +2000,8 @@ static void try_to_wake_up_local(struct task_struct *p)
|
||||
ttwu_activate(rq, p, ENQUEUE_WAKEUP);
|
||||
|
||||
ttwu_do_wakeup(rq, p, 0);
|
||||
ttwu_stat(p, smp_processor_id(), 0);
|
||||
if (schedstat_enabled())
|
||||
ttwu_stat(p, smp_processor_id(), 0);
|
||||
out:
|
||||
raw_spin_unlock(&p->pi_lock);
|
||||
}
|
||||
@@ -2183,7 +2043,6 @@ void __dl_clear_params(struct task_struct *p)
|
||||
dl_se->dl_bw = 0;
|
||||
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_new = 1;
|
||||
dl_se->dl_yielded = 0;
|
||||
}
|
||||
|
||||
@@ -2210,6 +2069,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
/* Even if schedstat is disabled, there should not be garbage */
|
||||
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
|
||||
#endif
|
||||
|
||||
@@ -2218,6 +2078,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
__dl_clear_params(p);
|
||||
|
||||
INIT_LIST_HEAD(&p->rt.run_list);
|
||||
p->rt.timeout = 0;
|
||||
p->rt.time_slice = sched_rr_timeslice;
|
||||
p->rt.on_rq = 0;
|
||||
p->rt.on_list = 0;
|
||||
|
||||
#ifdef CONFIG_PREEMPT_NOTIFIERS
|
||||
INIT_HLIST_HEAD(&p->preempt_notifiers);
|
||||
@@ -2281,6 +2145,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
|
||||
#endif
|
||||
#endif
|
||||
|
||||
DEFINE_STATIC_KEY_FALSE(sched_schedstats);
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
static void set_schedstats(bool enabled)
|
||||
{
|
||||
if (enabled)
|
||||
static_branch_enable(&sched_schedstats);
|
||||
else
|
||||
static_branch_disable(&sched_schedstats);
|
||||
}
|
||||
|
||||
void force_schedstat_enabled(void)
|
||||
{
|
||||
if (!schedstat_enabled()) {
|
||||
pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
|
||||
static_branch_enable(&sched_schedstats);
|
||||
}
|
||||
}
|
||||
|
||||
static int __init setup_schedstats(char *str)
|
||||
{
|
||||
int ret = 0;
|
||||
if (!str)
|
||||
goto out;
|
||||
|
||||
if (!strcmp(str, "enable")) {
|
||||
set_schedstats(true);
|
||||
ret = 1;
|
||||
} else if (!strcmp(str, "disable")) {
|
||||
set_schedstats(false);
|
||||
ret = 1;
|
||||
}
|
||||
out:
|
||||
if (!ret)
|
||||
pr_warn("Unable to parse schedstats=\n");
|
||||
|
||||
return ret;
|
||||
}
|
||||
__setup("schedstats=", setup_schedstats);
|
||||
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
int sysctl_schedstats(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
struct ctl_table t;
|
||||
int err;
|
||||
int state = static_branch_likely(&sched_schedstats);
|
||||
|
||||
if (write && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
t = *table;
|
||||
t.data = &state;
|
||||
err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
|
||||
if (err < 0)
|
||||
return err;
|
||||
if (write)
|
||||
set_schedstats(state);
|
||||
return err;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
* fork()/clone()-time setup:
|
||||
*/
|
||||
@@ -3010,16 +2937,6 @@ u64 scheduler_tick_max_deferment(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
notrace unsigned long get_parent_ip(unsigned long addr)
|
||||
{
|
||||
if (in_lock_functions(addr)) {
|
||||
addr = CALLER_ADDR2;
|
||||
if (in_lock_functions(addr))
|
||||
addr = CALLER_ADDR3;
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
defined(CONFIG_PREEMPT_TRACER))
|
||||
|
||||
@@ -3041,7 +2958,7 @@ void preempt_count_add(int val)
|
||||
PREEMPT_MASK - 10);
|
||||
#endif
|
||||
if (preempt_count() == val) {
|
||||
unsigned long ip = get_parent_ip(CALLER_ADDR1);
|
||||
unsigned long ip = get_lock_parent_ip();
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = ip;
|
||||
#endif
|
||||
@@ -3068,7 +2985,7 @@ void preempt_count_sub(int val)
|
||||
#endif
|
||||
|
||||
if (preempt_count() == val)
|
||||
trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
|
||||
__preempt_count_sub(val);
|
||||
}
|
||||
EXPORT_SYMBOL(preempt_count_sub);
|
||||
@@ -3280,7 +3197,6 @@ static void __sched notrace __schedule(bool preempt)
|
||||
|
||||
trace_sched_switch(preempt, prev, next);
|
||||
rq = context_switch(rq, prev, next); /* unlocks the rq */
|
||||
cpu = cpu_of(rq);
|
||||
} else {
|
||||
lockdep_unpin_lock(&rq->lock);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
@@ -3466,7 +3382,7 @@ EXPORT_SYMBOL(default_wake_function);
|
||||
*/
|
||||
void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
{
|
||||
int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
|
||||
int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
struct rq *rq;
|
||||
const struct sched_class *prev_class;
|
||||
|
||||
@@ -3494,11 +3410,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
|
||||
trace_sched_pi_setprio(p, prio);
|
||||
oldprio = p->prio;
|
||||
|
||||
if (oldprio == prio)
|
||||
queue_flag &= ~DEQUEUE_MOVE;
|
||||
|
||||
prev_class = p->sched_class;
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, queue_flag);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
@@ -3516,7 +3436,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.dl_boosted = 1;
|
||||
enqueue_flag |= ENQUEUE_REPLENISH;
|
||||
queue_flag |= ENQUEUE_REPLENISH;
|
||||
} else
|
||||
p->dl.dl_boosted = 0;
|
||||
p->sched_class = &dl_sched_class;
|
||||
@@ -3524,7 +3444,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
if (dl_prio(oldprio))
|
||||
p->dl.dl_boosted = 0;
|
||||
if (oldprio < prio)
|
||||
enqueue_flag |= ENQUEUE_HEAD;
|
||||
queue_flag |= ENQUEUE_HEAD;
|
||||
p->sched_class = &rt_sched_class;
|
||||
} else {
|
||||
if (dl_prio(oldprio))
|
||||
@@ -3539,7 +3459,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
|
||||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, p, enqueue_flag);
|
||||
enqueue_task(rq, p, queue_flag);
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
out_unlock:
|
||||
@@ -3895,6 +3815,7 @@ static int __sched_setscheduler(struct task_struct *p,
|
||||
const struct sched_class *prev_class;
|
||||
struct rq *rq;
|
||||
int reset_on_fork;
|
||||
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
|
||||
|
||||
/* may grab non-irq protected spin_locks */
|
||||
BUG_ON(in_interrupt());
|
||||
@@ -4077,17 +3998,14 @@ change:
|
||||
* itself.
|
||||
*/
|
||||
new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
|
||||
if (new_effective_prio == oldprio) {
|
||||
__setscheduler_params(p, attr);
|
||||
task_rq_unlock(rq, p, &flags);
|
||||
return 0;
|
||||
}
|
||||
if (new_effective_prio == oldprio)
|
||||
queue_flags &= ~DEQUEUE_MOVE;
|
||||
}
|
||||
|
||||
queued = task_on_rq_queued(p);
|
||||
running = task_current(rq, p);
|
||||
if (queued)
|
||||
dequeue_task(rq, p, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, p, queue_flags);
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
@@ -4097,15 +4015,14 @@ change:
|
||||
if (running)
|
||||
p->sched_class->set_curr_task(rq);
|
||||
if (queued) {
|
||||
int enqueue_flags = ENQUEUE_RESTORE;
|
||||
/*
|
||||
* We enqueue to tail when the priority of a task is
|
||||
* increased (user space view).
|
||||
*/
|
||||
if (oldprio <= p->prio)
|
||||
enqueue_flags |= ENQUEUE_HEAD;
|
||||
if (oldprio < p->prio)
|
||||
queue_flags |= ENQUEUE_HEAD;
|
||||
|
||||
enqueue_task(rq, p, enqueue_flags);
|
||||
enqueue_task(rq, p, queue_flags);
|
||||
}
|
||||
|
||||
check_class_changed(rq, p, prev_class, oldprio);
|
||||
@@ -5096,6 +5013,8 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||
idle->state = TASK_RUNNING;
|
||||
idle->se.exec_start = sched_clock();
|
||||
|
||||
kasan_unpoison_task_stack(idle);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Its possible that init_idle() gets called multiple times on a task,
|
||||
@@ -5405,183 +5324,6 @@ static void migrate_tasks(struct rq *dead_rq)
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||
|
||||
static struct ctl_table sd_ctl_dir[] = {
|
||||
{
|
||||
.procname = "sched_domain",
|
||||
.mode = 0555,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table sd_ctl_root[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = sd_ctl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_entry(int n)
|
||||
{
|
||||
struct ctl_table *entry =
|
||||
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
{
|
||||
struct ctl_table *entry;
|
||||
|
||||
/*
|
||||
* In the intermediate directories, both the child directory and
|
||||
* procname are dynamically allocated and could fail but the mode
|
||||
* will always be set. In the lowest directory the names are
|
||||
* static strings and all have proc handlers.
|
||||
*/
|
||||
for (entry = *tablep; entry->mode; entry++) {
|
||||
if (entry->child)
|
||||
sd_free_ctl_entry(&entry->child);
|
||||
if (entry->proc_handler == NULL)
|
||||
kfree(entry->procname);
|
||||
}
|
||||
|
||||
kfree(*tablep);
|
||||
*tablep = NULL;
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
const char *procname, void *data, int maxlen,
|
||||
umode_t mode, proc_handler *proc_handler,
|
||||
bool load_idx)
|
||||
{
|
||||
entry->procname = procname;
|
||||
entry->data = data;
|
||||
entry->maxlen = maxlen;
|
||||
entry->mode = mode;
|
||||
entry->proc_handler = proc_handler;
|
||||
|
||||
if (load_idx) {
|
||||
entry->extra1 = &min_load_idx;
|
||||
entry->extra2 = &max_load_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9], "cache_nice_tries",
|
||||
&sd->cache_nice_tries,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost",
|
||||
&sd->max_newidle_lb_cost,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||
{
|
||||
struct ctl_table *entry, *table;
|
||||
struct sched_domain *sd;
|
||||
int domain_num = 0, i;
|
||||
char buf[32];
|
||||
|
||||
for_each_domain(cpu, sd)
|
||||
domain_num++;
|
||||
entry = table = sd_alloc_ctl_entry(domain_num + 1);
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for_each_domain(cpu, sd) {
|
||||
snprintf(buf, 32, "domain%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_domain_table(sd);
|
||||
entry++;
|
||||
i++;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
static void register_sched_domain_sysctl(void)
|
||||
{
|
||||
int i, cpu_num = num_possible_cpus();
|
||||
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
|
||||
char buf[32];
|
||||
|
||||
WARN_ON(sd_ctl_dir[0].child);
|
||||
sd_ctl_dir[0].child = entry;
|
||||
|
||||
if (entry == NULL)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
snprintf(buf, 32, "cpu%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_cpu_table(i);
|
||||
entry++;
|
||||
}
|
||||
|
||||
WARN_ON(sd_sysctl_header);
|
||||
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
|
||||
}
|
||||
|
||||
/* may be called multiple times per register */
|
||||
static void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
unregister_sysctl_table(sd_sysctl_header);
|
||||
sd_sysctl_header = NULL;
|
||||
if (sd_ctl_dir[0].child)
|
||||
sd_free_ctl_entry(&sd_ctl_dir[0].child);
|
||||
}
|
||||
#else
|
||||
static void register_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
static void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
|
||||
|
||||
static void set_rq_online(struct rq *rq)
|
||||
{
|
||||
if (!rq->online) {
|
||||
@@ -5693,16 +5435,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
|
||||
set_cpu_rq_start_time();
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_ONLINE:
|
||||
/*
|
||||
* At this point a starting CPU has marked itself as online via
|
||||
* set_cpu_online(). But it might not yet have marked itself
|
||||
* as active, which is essential from here on.
|
||||
*/
|
||||
set_cpu_active(cpu, true);
|
||||
stop_machine_unpark(cpu);
|
||||
return NOTIFY_OK;
|
||||
|
||||
case CPU_DOWN_FAILED:
|
||||
set_cpu_active(cpu, true);
|
||||
return NOTIFY_OK;
|
||||
@@ -6174,11 +5906,16 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
|
||||
/* Setup the mask of cpus configured for isolated domains */
|
||||
static int __init isolated_cpu_setup(char *str)
|
||||
{
|
||||
int ret;
|
||||
|
||||
alloc_bootmem_cpumask_var(&cpu_isolated_map);
|
||||
cpulist_parse(str, cpu_isolated_map);
|
||||
ret = cpulist_parse(str, cpu_isolated_map);
|
||||
if (ret) {
|
||||
pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("isolcpus=", isolated_cpu_setup);
|
||||
|
||||
struct s_data {
|
||||
@@ -7889,7 +7626,7 @@ void sched_move_task(struct task_struct *tsk)
|
||||
queued = task_on_rq_queued(tsk);
|
||||
|
||||
if (queued)
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE);
|
||||
dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
|
||||
if (unlikely(running))
|
||||
put_prev_task(rq, tsk);
|
||||
|
||||
@@ -7913,7 +7650,7 @@ void sched_move_task(struct task_struct *tsk)
|
||||
if (unlikely(running))
|
||||
tsk->sched_class->set_curr_task(rq);
|
||||
if (queued)
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE);
|
||||
enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
|
||||
|
||||
task_rq_unlock(rq, tsk, &flags);
|
||||
}
|
||||
|
@@ -262,21 +262,21 @@ static __always_inline bool steal_account_process_tick(void)
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
if (static_key_false(¶virt_steal_enabled)) {
|
||||
u64 steal;
|
||||
cputime_t steal_ct;
|
||||
unsigned long steal_jiffies;
|
||||
|
||||
steal = paravirt_steal_clock(smp_processor_id());
|
||||
steal -= this_rq()->prev_steal_time;
|
||||
|
||||
/*
|
||||
* cputime_t may be less precise than nsecs (eg: if it's
|
||||
* based on jiffies). Lets cast the result to cputime
|
||||
* steal is in nsecs but our caller is expecting steal
|
||||
* time in jiffies. Lets cast the result to jiffies
|
||||
* granularity and account the rest on the next rounds.
|
||||
*/
|
||||
steal_ct = nsecs_to_cputime(steal);
|
||||
this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
|
||||
steal_jiffies = nsecs_to_jiffies(steal);
|
||||
this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies);
|
||||
|
||||
account_steal_time(steal_ct);
|
||||
return steal_ct;
|
||||
account_steal_time(jiffies_to_cputime(steal_jiffies));
|
||||
return steal_jiffies;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
@@ -668,26 +668,25 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
|
||||
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||
|
||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||
static unsigned long long vtime_delta(struct task_struct *tsk)
|
||||
static cputime_t vtime_delta(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long long clock;
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
|
||||
clock = local_clock();
|
||||
if (clock < tsk->vtime_snap)
|
||||
if (time_before(now, (unsigned long)tsk->vtime_snap))
|
||||
return 0;
|
||||
|
||||
return clock - tsk->vtime_snap;
|
||||
return jiffies_to_cputime(now - tsk->vtime_snap);
|
||||
}
|
||||
|
||||
static cputime_t get_vtime_delta(struct task_struct *tsk)
|
||||
{
|
||||
unsigned long long delta = vtime_delta(tsk);
|
||||
unsigned long now = READ_ONCE(jiffies);
|
||||
unsigned long delta = now - tsk->vtime_snap;
|
||||
|
||||
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
|
||||
tsk->vtime_snap += delta;
|
||||
tsk->vtime_snap = now;
|
||||
|
||||
/* CHECKME: always safe to convert nsecs to cputime? */
|
||||
return nsecs_to_cputime(delta);
|
||||
return jiffies_to_cputime(delta);
|
||||
}
|
||||
|
||||
static void __vtime_account_system(struct task_struct *tsk)
|
||||
@@ -699,6 +698,9 @@ static void __vtime_account_system(struct task_struct *tsk)
|
||||
|
||||
void vtime_account_system(struct task_struct *tsk)
|
||||
{
|
||||
if (!vtime_delta(tsk))
|
||||
return;
|
||||
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
@@ -707,7 +709,8 @@ void vtime_account_system(struct task_struct *tsk)
|
||||
void vtime_gen_account_irq_exit(struct task_struct *tsk)
|
||||
{
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
if (context_tracking_in_user())
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
@@ -718,16 +721,19 @@ void vtime_account_user(struct task_struct *tsk)
|
||||
cputime_t delta_cpu;
|
||||
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_SYS;
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
if (vtime_delta(tsk)) {
|
||||
delta_cpu = get_vtime_delta(tsk);
|
||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||
}
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
|
||||
void vtime_user_enter(struct task_struct *tsk)
|
||||
{
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
tsk->vtime_snap_whence = VTIME_USER;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
@@ -742,7 +748,8 @@ void vtime_guest_enter(struct task_struct *tsk)
|
||||
* that can thus safely catch up with a tickless delta.
|
||||
*/
|
||||
write_seqcount_begin(&tsk->vtime_seqcount);
|
||||
__vtime_account_system(tsk);
|
||||
if (vtime_delta(tsk))
|
||||
__vtime_account_system(tsk);
|
||||
current->flags |= PF_VCPU;
|
||||
write_seqcount_end(&tsk->vtime_seqcount);
|
||||
}
|
||||
@@ -772,7 +779,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
|
||||
|
||||
write_seqcount_begin(¤t->vtime_seqcount);
|
||||
current->vtime_snap_whence = VTIME_SYS;
|
||||
current->vtime_snap = sched_clock_cpu(smp_processor_id());
|
||||
current->vtime_snap = jiffies;
|
||||
write_seqcount_end(¤t->vtime_seqcount);
|
||||
}
|
||||
|
||||
@@ -783,7 +790,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
|
||||
local_irq_save(flags);
|
||||
write_seqcount_begin(&t->vtime_seqcount);
|
||||
t->vtime_snap_whence = VTIME_SYS;
|
||||
t->vtime_snap = sched_clock_cpu(cpu);
|
||||
t->vtime_snap = jiffies;
|
||||
write_seqcount_end(&t->vtime_seqcount);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
@@ -352,7 +352,15 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
|
||||
WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline));
|
||||
|
||||
/*
|
||||
* We are racing with the deadline timer. So, do nothing because
|
||||
* the deadline timer handler will take care of properly recharging
|
||||
* the runtime and postponing the deadline
|
||||
*/
|
||||
if (dl_se->dl_throttled)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We use the regular wall clock time to set deadlines in the
|
||||
@@ -361,7 +369,6 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
|
||||
*/
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
dl_se->dl_new = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -503,15 +510,6 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
|
||||
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
|
||||
struct rq *rq = rq_of_dl_rq(dl_rq);
|
||||
|
||||
/*
|
||||
* The arrival of a new instance needs special treatment, i.e.,
|
||||
* the actual scheduling parameters have to be "renewed".
|
||||
*/
|
||||
if (dl_se->dl_new) {
|
||||
setup_new_dl_entity(dl_se, pi_se);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
|
||||
dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
@@ -607,16 +605,6 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is possible if switched_from_dl() raced against a running
|
||||
* callback that took the above !dl_task() path and we've since then
|
||||
* switched back into SCHED_DEADLINE.
|
||||
*
|
||||
* There's nothing to do except drop our task reference.
|
||||
*/
|
||||
if (dl_se->dl_new)
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* The task might have been boosted by someone else and might be in the
|
||||
* boosting/deboosting path, its not throttled.
|
||||
@@ -925,7 +913,7 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se,
|
||||
* parameters of the task might need updating. Otherwise,
|
||||
* we want a replenishment of its runtime.
|
||||
*/
|
||||
if (dl_se->dl_new || flags & ENQUEUE_WAKEUP)
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
update_dl_entity(dl_se, pi_se);
|
||||
else if (flags & ENQUEUE_REPLENISH)
|
||||
replenish_dl_entity(dl_se, pi_se);
|
||||
@@ -1726,6 +1714,9 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
|
||||
*/
|
||||
static void switched_to_dl(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
if (dl_time_before(p->dl.deadline, rq_clock(rq)))
|
||||
setup_new_dl_entity(&p->dl, &p->dl);
|
||||
|
||||
if (task_on_rq_queued(p) && rq->curr != p) {
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->nr_cpus_allowed > 1 && rq->dl.overloaded)
|
||||
@@ -1772,8 +1763,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p,
|
||||
*/
|
||||
resched_curr(rq);
|
||||
#endif /* CONFIG_SMP */
|
||||
} else
|
||||
switched_to_dl(rq, p);
|
||||
}
|
||||
}
|
||||
|
||||
const struct sched_class dl_sched_class = {
|
||||
|
@@ -16,6 +16,7 @@
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/debugfs.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
@@ -58,6 +59,309 @@ static unsigned long nsec_low(unsigned long long nsec)
|
||||
|
||||
#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
#name ,
|
||||
|
||||
static const char * const sched_feat_names[] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static int sched_feat_show(struct seq_file *m, void *v)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (!(sysctl_sched_features & (1UL << i)))
|
||||
seq_puts(m, "NO_");
|
||||
seq_printf(m, "%s ", sched_feat_names[i]);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JUMP_LABEL
|
||||
|
||||
#define jump_label_key__true STATIC_KEY_INIT_TRUE
|
||||
#define jump_label_key__false STATIC_KEY_INIT_FALSE
|
||||
|
||||
#define SCHED_FEAT(name, enabled) \
|
||||
jump_label_key__##enabled ,
|
||||
|
||||
struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
|
||||
#include "features.h"
|
||||
};
|
||||
|
||||
#undef SCHED_FEAT
|
||||
|
||||
static void sched_feat_disable(int i)
|
||||
{
|
||||
static_key_disable(&sched_feat_keys[i]);
|
||||
}
|
||||
|
||||
static void sched_feat_enable(int i)
|
||||
{
|
||||
static_key_enable(&sched_feat_keys[i]);
|
||||
}
|
||||
#else
|
||||
static void sched_feat_disable(int i) { };
|
||||
static void sched_feat_enable(int i) { };
|
||||
#endif /* HAVE_JUMP_LABEL */
|
||||
|
||||
static int sched_feat_set(char *cmp)
|
||||
{
|
||||
int i;
|
||||
int neg = 0;
|
||||
|
||||
if (strncmp(cmp, "NO_", 3) == 0) {
|
||||
neg = 1;
|
||||
cmp += 3;
|
||||
}
|
||||
|
||||
for (i = 0; i < __SCHED_FEAT_NR; i++) {
|
||||
if (strcmp(cmp, sched_feat_names[i]) == 0) {
|
||||
if (neg) {
|
||||
sysctl_sched_features &= ~(1UL << i);
|
||||
sched_feat_disable(i);
|
||||
} else {
|
||||
sysctl_sched_features |= (1UL << i);
|
||||
sched_feat_enable(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
sched_feat_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
char *cmp;
|
||||
int i;
|
||||
struct inode *inode;
|
||||
|
||||
if (cnt > 63)
|
||||
cnt = 63;
|
||||
|
||||
if (copy_from_user(&buf, ubuf, cnt))
|
||||
return -EFAULT;
|
||||
|
||||
buf[cnt] = 0;
|
||||
cmp = strstrip(buf);
|
||||
|
||||
/* Ensure the static_key remains in a consistent state */
|
||||
inode = file_inode(filp);
|
||||
inode_lock(inode);
|
||||
i = sched_feat_set(cmp);
|
||||
inode_unlock(inode);
|
||||
if (i == __SCHED_FEAT_NR)
|
||||
return -EINVAL;
|
||||
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static int sched_feat_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, sched_feat_show, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations sched_feat_fops = {
|
||||
.open = sched_feat_open,
|
||||
.write = sched_feat_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static __init int sched_init_debug(void)
|
||||
{
|
||||
debugfs_create_file("sched_features", 0644, NULL, NULL,
|
||||
&sched_feat_fops);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(sched_init_debug);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
|
||||
static struct ctl_table sd_ctl_dir[] = {
|
||||
{
|
||||
.procname = "sched_domain",
|
||||
.mode = 0555,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table sd_ctl_root[] = {
|
||||
{
|
||||
.procname = "kernel",
|
||||
.mode = 0555,
|
||||
.child = sd_ctl_dir,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_entry(int n)
|
||||
{
|
||||
struct ctl_table *entry =
|
||||
kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
{
|
||||
struct ctl_table *entry;
|
||||
|
||||
/*
|
||||
* In the intermediate directories, both the child directory and
|
||||
* procname are dynamically allocated and could fail but the mode
|
||||
* will always be set. In the lowest directory the names are
|
||||
* static strings and all have proc handlers.
|
||||
*/
|
||||
for (entry = *tablep; entry->mode; entry++) {
|
||||
if (entry->child)
|
||||
sd_free_ctl_entry(&entry->child);
|
||||
if (entry->proc_handler == NULL)
|
||||
kfree(entry->procname);
|
||||
}
|
||||
|
||||
kfree(*tablep);
|
||||
*tablep = NULL;
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
const char *procname, void *data, int maxlen,
|
||||
umode_t mode, proc_handler *proc_handler,
|
||||
bool load_idx)
|
||||
{
|
||||
entry->procname = procname;
|
||||
entry->data = data;
|
||||
entry->maxlen = maxlen;
|
||||
entry->mode = mode;
|
||||
entry->proc_handler = proc_handler;
|
||||
|
||||
if (load_idx) {
|
||||
entry->extra1 = &min_load_idx;
|
||||
entry->extra2 = &max_load_idx;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ctl_table *
|
||||
sd_alloc_ctl_domain_table(struct sched_domain *sd)
|
||||
{
|
||||
struct ctl_table *table = sd_alloc_ctl_entry(14);
|
||||
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
set_table_entry(&table[0], "min_interval", &sd->min_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[1], "max_interval", &sd->max_interval,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, true);
|
||||
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[9], "cache_nice_tries",
|
||||
&sd->cache_nice_tries,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[10], "flags", &sd->flags,
|
||||
sizeof(int), 0644, proc_dointvec_minmax, false);
|
||||
set_table_entry(&table[11], "max_newidle_lb_cost",
|
||||
&sd->max_newidle_lb_cost,
|
||||
sizeof(long), 0644, proc_doulongvec_minmax, false);
|
||||
set_table_entry(&table[12], "name", sd->name,
|
||||
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
|
||||
/* &table[13] is terminator */
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
|
||||
{
|
||||
struct ctl_table *entry, *table;
|
||||
struct sched_domain *sd;
|
||||
int domain_num = 0, i;
|
||||
char buf[32];
|
||||
|
||||
for_each_domain(cpu, sd)
|
||||
domain_num++;
|
||||
entry = table = sd_alloc_ctl_entry(domain_num + 1);
|
||||
if (table == NULL)
|
||||
return NULL;
|
||||
|
||||
i = 0;
|
||||
for_each_domain(cpu, sd) {
|
||||
snprintf(buf, 32, "domain%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_domain_table(sd);
|
||||
entry++;
|
||||
i++;
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
static struct ctl_table_header *sd_sysctl_header;
|
||||
void register_sched_domain_sysctl(void)
|
||||
{
|
||||
int i, cpu_num = num_possible_cpus();
|
||||
struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
|
||||
char buf[32];
|
||||
|
||||
WARN_ON(sd_ctl_dir[0].child);
|
||||
sd_ctl_dir[0].child = entry;
|
||||
|
||||
if (entry == NULL)
|
||||
return;
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
snprintf(buf, 32, "cpu%d", i);
|
||||
entry->procname = kstrdup(buf, GFP_KERNEL);
|
||||
entry->mode = 0555;
|
||||
entry->child = sd_alloc_ctl_cpu_table(i);
|
||||
entry++;
|
||||
}
|
||||
|
||||
WARN_ON(sd_sysctl_header);
|
||||
sd_sysctl_header = register_sysctl_table(sd_ctl_root);
|
||||
}
|
||||
|
||||
/* may be called multiple times per register */
|
||||
void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
unregister_sysctl_table(sd_sysctl_header);
|
||||
sd_sysctl_header = NULL;
|
||||
if (sd_ctl_dir[0].child)
|
||||
sd_free_ctl_entry(&sd_ctl_dir[0].child);
|
||||
}
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
|
||||
{
|
||||
@@ -75,16 +379,18 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
|
||||
PN(se->vruntime);
|
||||
PN(se->sum_exec_runtime);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se->statistics.wait_start);
|
||||
PN(se->statistics.sleep_start);
|
||||
PN(se->statistics.block_start);
|
||||
PN(se->statistics.sleep_max);
|
||||
PN(se->statistics.block_max);
|
||||
PN(se->statistics.exec_max);
|
||||
PN(se->statistics.slice_max);
|
||||
PN(se->statistics.wait_max);
|
||||
PN(se->statistics.wait_sum);
|
||||
P(se->statistics.wait_count);
|
||||
if (schedstat_enabled()) {
|
||||
PN(se->statistics.wait_start);
|
||||
PN(se->statistics.sleep_start);
|
||||
PN(se->statistics.block_start);
|
||||
PN(se->statistics.sleep_max);
|
||||
PN(se->statistics.block_max);
|
||||
PN(se->statistics.exec_max);
|
||||
PN(se->statistics.slice_max);
|
||||
PN(se->statistics.wait_max);
|
||||
PN(se->statistics.wait_sum);
|
||||
P(se->statistics.wait_count);
|
||||
}
|
||||
#endif
|
||||
P(se->load.weight);
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -122,10 +428,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
(long long)(p->nvcsw + p->nivcsw),
|
||||
p->prio);
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.statistics.wait_sum),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.statistics.sum_sleep_runtime));
|
||||
if (schedstat_enabled()) {
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
SPLIT_NS(p->se.statistics.wait_sum),
|
||||
SPLIT_NS(p->se.sum_exec_runtime),
|
||||
SPLIT_NS(p->se.statistics.sum_sleep_runtime));
|
||||
}
|
||||
#else
|
||||
SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
|
||||
0LL, 0L,
|
||||
@@ -258,8 +566,17 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
|
||||
|
||||
void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq)
|
||||
{
|
||||
struct dl_bw *dl_bw;
|
||||
|
||||
SEQ_printf(m, "\ndl_rq[%d]:\n", cpu);
|
||||
SEQ_printf(m, " .%-30s: %ld\n", "dl_nr_running", dl_rq->dl_nr_running);
|
||||
#ifdef CONFIG_SMP
|
||||
dl_bw = &cpu_rq(cpu)->rd->dl_bw;
|
||||
#else
|
||||
dl_bw = &dl_rq->dl_bw;
|
||||
#endif
|
||||
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw);
|
||||
SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw);
|
||||
}
|
||||
|
||||
extern __read_mostly int sched_clock_running;
|
||||
@@ -313,17 +630,18 @@ do { \
|
||||
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
|
||||
#define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
|
||||
|
||||
P(yld_count);
|
||||
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
#ifdef CONFIG_SMP
|
||||
P64(avg_idle);
|
||||
P64(max_idle_balance_cost);
|
||||
#endif
|
||||
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
if (schedstat_enabled()) {
|
||||
P(yld_count);
|
||||
P(sched_count);
|
||||
P(sched_goidle);
|
||||
P(ttwu_count);
|
||||
P(ttwu_local);
|
||||
}
|
||||
|
||||
#undef P
|
||||
#undef P64
|
||||
@@ -569,38 +887,39 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
|
||||
nr_switches = p->nvcsw + p->nivcsw;
|
||||
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
PN(se.statistics.sum_sleep_runtime);
|
||||
PN(se.statistics.wait_start);
|
||||
PN(se.statistics.sleep_start);
|
||||
PN(se.statistics.block_start);
|
||||
PN(se.statistics.sleep_max);
|
||||
PN(se.statistics.block_max);
|
||||
PN(se.statistics.exec_max);
|
||||
PN(se.statistics.slice_max);
|
||||
PN(se.statistics.wait_max);
|
||||
PN(se.statistics.wait_sum);
|
||||
P(se.statistics.wait_count);
|
||||
PN(se.statistics.iowait_sum);
|
||||
P(se.statistics.iowait_count);
|
||||
P(se.nr_migrations);
|
||||
P(se.statistics.nr_migrations_cold);
|
||||
P(se.statistics.nr_failed_migrations_affine);
|
||||
P(se.statistics.nr_failed_migrations_running);
|
||||
P(se.statistics.nr_failed_migrations_hot);
|
||||
P(se.statistics.nr_forced_migrations);
|
||||
P(se.statistics.nr_wakeups);
|
||||
P(se.statistics.nr_wakeups_sync);
|
||||
P(se.statistics.nr_wakeups_migrate);
|
||||
P(se.statistics.nr_wakeups_local);
|
||||
P(se.statistics.nr_wakeups_remote);
|
||||
P(se.statistics.nr_wakeups_affine);
|
||||
P(se.statistics.nr_wakeups_affine_attempts);
|
||||
P(se.statistics.nr_wakeups_passive);
|
||||
P(se.statistics.nr_wakeups_idle);
|
||||
|
||||
{
|
||||
if (schedstat_enabled()) {
|
||||
u64 avg_atom, avg_per_cpu;
|
||||
|
||||
PN(se.statistics.sum_sleep_runtime);
|
||||
PN(se.statistics.wait_start);
|
||||
PN(se.statistics.sleep_start);
|
||||
PN(se.statistics.block_start);
|
||||
PN(se.statistics.sleep_max);
|
||||
PN(se.statistics.block_max);
|
||||
PN(se.statistics.exec_max);
|
||||
PN(se.statistics.slice_max);
|
||||
PN(se.statistics.wait_max);
|
||||
PN(se.statistics.wait_sum);
|
||||
P(se.statistics.wait_count);
|
||||
PN(se.statistics.iowait_sum);
|
||||
P(se.statistics.iowait_count);
|
||||
P(se.statistics.nr_migrations_cold);
|
||||
P(se.statistics.nr_failed_migrations_affine);
|
||||
P(se.statistics.nr_failed_migrations_running);
|
||||
P(se.statistics.nr_failed_migrations_hot);
|
||||
P(se.statistics.nr_forced_migrations);
|
||||
P(se.statistics.nr_wakeups);
|
||||
P(se.statistics.nr_wakeups_sync);
|
||||
P(se.statistics.nr_wakeups_migrate);
|
||||
P(se.statistics.nr_wakeups_local);
|
||||
P(se.statistics.nr_wakeups_remote);
|
||||
P(se.statistics.nr_wakeups_affine);
|
||||
P(se.statistics.nr_wakeups_affine_attempts);
|
||||
P(se.statistics.nr_wakeups_passive);
|
||||
P(se.statistics.nr_wakeups_idle);
|
||||
|
||||
avg_atom = p->se.sum_exec_runtime;
|
||||
if (nr_switches)
|
||||
avg_atom = div64_ul(avg_atom, nr_switches);
|
||||
|
@@ -20,8 +20,8 @@
|
||||
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
|
||||
*/
|
||||
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/latencytop.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/slab.h>
|
||||
@@ -755,7 +755,9 @@ static void
|
||||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
struct task_struct *p;
|
||||
u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
|
||||
u64 delta;
|
||||
|
||||
delta = rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start;
|
||||
|
||||
if (entity_is_task(se)) {
|
||||
p = task_of(se);
|
||||
@@ -776,22 +778,12 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
se->statistics.wait_sum += delta;
|
||||
se->statistics.wait_start = 0;
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Task is being enqueued - update stats:
|
||||
*/
|
||||
static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
static inline void
|
||||
update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
/*
|
||||
* Are we enqueueing a waiting task? (for current tasks
|
||||
@@ -802,7 +794,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
/*
|
||||
* Mark the end of the wait period if dequeueing a
|
||||
@@ -810,7 +802,40 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
*/
|
||||
if (se != cfs_rq->curr)
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
|
||||
if (flags & DEQUEUE_SLEEP) {
|
||||
if (entity_is_task(se)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if (tsk->state & TASK_INTERRUPTIBLE)
|
||||
se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
|
||||
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
||||
se->statistics.block_start = rq_clock(rq_of(cfs_rq));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are picking a new current task - update its stats:
|
||||
@@ -907,10 +932,11 @@ struct numa_group {
|
||||
spinlock_t lock; /* nr_tasks, tasks */
|
||||
int nr_tasks;
|
||||
pid_t gid;
|
||||
int active_nodes;
|
||||
|
||||
struct rcu_head rcu;
|
||||
nodemask_t active_nodes;
|
||||
unsigned long total_faults;
|
||||
unsigned long max_faults_cpu;
|
||||
/*
|
||||
* Faults_cpu is used to decide whether memory should move
|
||||
* towards the CPU. As a consequence, these stats are weighted
|
||||
@@ -969,6 +995,18 @@ static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
|
||||
group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
|
||||
}
|
||||
|
||||
/*
|
||||
* A node triggering more than 1/3 as many NUMA faults as the maximum is
|
||||
* considered part of a numa group's pseudo-interleaving set. Migrations
|
||||
* between these nodes are slowed down, to allow things to settle down.
|
||||
*/
|
||||
#define ACTIVE_NODE_FRACTION 3
|
||||
|
||||
static bool numa_is_active_node(int nid, struct numa_group *ng)
|
||||
{
|
||||
return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
|
||||
}
|
||||
|
||||
/* Handle placement on systems where not all nodes are directly connected. */
|
||||
static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
|
||||
int maxdist, bool task)
|
||||
@@ -1118,27 +1156,23 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Do not migrate if the destination is not a node that
|
||||
* is actively used by this numa group.
|
||||
* Destination node is much more heavily used than the source
|
||||
* node? Allow migration.
|
||||
*/
|
||||
if (!node_isset(dst_nid, ng->active_nodes))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Source is a node that is not actively used by this
|
||||
* numa group, while the destination is. Migrate.
|
||||
*/
|
||||
if (!node_isset(src_nid, ng->active_nodes))
|
||||
if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
|
||||
ACTIVE_NODE_FRACTION)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Both source and destination are nodes in active
|
||||
* use by this numa group. Maximize memory bandwidth
|
||||
* by migrating from more heavily used groups, to less
|
||||
* heavily used ones, spreading the load around.
|
||||
* Use a 1/4 hysteresis to avoid spurious page movement.
|
||||
* Distribute memory according to CPU & memory use on each node,
|
||||
* with 3/4 hysteresis to avoid unnecessary memory migrations:
|
||||
*
|
||||
* faults_cpu(dst) 3 faults_cpu(src)
|
||||
* --------------- * - > ---------------
|
||||
* faults_mem(dst) 4 faults_mem(src)
|
||||
*/
|
||||
return group_faults(p, dst_nid) < (group_faults(p, src_nid) * 3 / 4);
|
||||
return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
|
||||
group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
|
||||
}
|
||||
|
||||
static unsigned long weighted_cpuload(const int cpu);
|
||||
@@ -1484,7 +1518,7 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
|
||||
.best_task = NULL,
|
||||
.best_imp = 0,
|
||||
.best_cpu = -1
|
||||
.best_cpu = -1,
|
||||
};
|
||||
struct sched_domain *sd;
|
||||
unsigned long taskweight, groupweight;
|
||||
@@ -1536,8 +1570,7 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
* multiple NUMA nodes; in order to better consolidate the group,
|
||||
* we need to check other locations.
|
||||
*/
|
||||
if (env.best_cpu == -1 || (p->numa_group &&
|
||||
nodes_weight(p->numa_group->active_nodes) > 1)) {
|
||||
if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
|
||||
for_each_online_node(nid) {
|
||||
if (nid == env.src_nid || nid == p->numa_preferred_nid)
|
||||
continue;
|
||||
@@ -1572,12 +1605,14 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
* trying for a better one later. Do not set the preferred node here.
|
||||
*/
|
||||
if (p->numa_group) {
|
||||
struct numa_group *ng = p->numa_group;
|
||||
|
||||
if (env.best_cpu == -1)
|
||||
nid = env.src_nid;
|
||||
else
|
||||
nid = env.dst_nid;
|
||||
|
||||
if (node_isset(nid, p->numa_group->active_nodes))
|
||||
if (ng->active_nodes > 1 && numa_is_active_node(env.dst_nid, ng))
|
||||
sched_setnuma(p, env.dst_nid);
|
||||
}
|
||||
|
||||
@@ -1627,20 +1662,15 @@ static void numa_migrate_preferred(struct task_struct *p)
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the nodes on which the workload is actively running. We do this by
|
||||
* Find out how many nodes on the workload is actively running on. Do this by
|
||||
* tracking the nodes from which NUMA hinting faults are triggered. This can
|
||||
* be different from the set of nodes where the workload's memory is currently
|
||||
* located.
|
||||
*
|
||||
* The bitmask is used to make smarter decisions on when to do NUMA page
|
||||
* migrations, To prevent flip-flopping, and excessive page migrations, nodes
|
||||
* are added when they cause over 6/16 of the maximum number of faults, but
|
||||
* only removed when they drop below 3/16.
|
||||
*/
|
||||
static void update_numa_active_node_mask(struct numa_group *numa_group)
|
||||
static void numa_group_count_active_nodes(struct numa_group *numa_group)
|
||||
{
|
||||
unsigned long faults, max_faults = 0;
|
||||
int nid;
|
||||
int nid, active_nodes = 0;
|
||||
|
||||
for_each_online_node(nid) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
@@ -1650,12 +1680,12 @@ static void update_numa_active_node_mask(struct numa_group *numa_group)
|
||||
|
||||
for_each_online_node(nid) {
|
||||
faults = group_faults_cpu(numa_group, nid);
|
||||
if (!node_isset(nid, numa_group->active_nodes)) {
|
||||
if (faults > max_faults * 6 / 16)
|
||||
node_set(nid, numa_group->active_nodes);
|
||||
} else if (faults < max_faults * 3 / 16)
|
||||
node_clear(nid, numa_group->active_nodes);
|
||||
if (faults * ACTIVE_NODE_FRACTION > max_faults)
|
||||
active_nodes++;
|
||||
}
|
||||
|
||||
numa_group->max_faults_cpu = max_faults;
|
||||
numa_group->active_nodes = active_nodes;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1946,7 +1976,7 @@ static void task_numa_placement(struct task_struct *p)
|
||||
update_task_scan_period(p, fault_types[0], fault_types[1]);
|
||||
|
||||
if (p->numa_group) {
|
||||
update_numa_active_node_mask(p->numa_group);
|
||||
numa_group_count_active_nodes(p->numa_group);
|
||||
spin_unlock_irq(group_lock);
|
||||
max_nid = preferred_group_nid(p, max_group_nid);
|
||||
}
|
||||
@@ -1990,14 +2020,14 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
|
||||
return;
|
||||
|
||||
atomic_set(&grp->refcount, 1);
|
||||
grp->active_nodes = 1;
|
||||
grp->max_faults_cpu = 0;
|
||||
spin_lock_init(&grp->lock);
|
||||
grp->gid = p->pid;
|
||||
/* Second half of the array tracks nids where faults happen */
|
||||
grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
|
||||
nr_node_ids;
|
||||
|
||||
node_set(task_node(current), grp->active_nodes);
|
||||
|
||||
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
|
||||
grp->faults[i] = p->numa_faults[i];
|
||||
|
||||
@@ -2111,6 +2141,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
|
||||
bool migrated = flags & TNF_MIGRATED;
|
||||
int cpu_node = task_node(current);
|
||||
int local = !!(flags & TNF_FAULT_LOCAL);
|
||||
struct numa_group *ng;
|
||||
int priv;
|
||||
|
||||
if (!static_branch_likely(&sched_numa_balancing))
|
||||
@@ -2151,9 +2182,10 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
|
||||
* actively using should be counted as local. This allows the
|
||||
* scan rate to slow down when a workload has settled down.
|
||||
*/
|
||||
if (!priv && !local && p->numa_group &&
|
||||
node_isset(cpu_node, p->numa_group->active_nodes) &&
|
||||
node_isset(mem_node, p->numa_group->active_nodes))
|
||||
ng = p->numa_group;
|
||||
if (!priv && !local && ng && ng->active_nodes > 1 &&
|
||||
numa_is_active_node(cpu_node, ng) &&
|
||||
numa_is_active_node(mem_node, ng))
|
||||
local = 1;
|
||||
|
||||
task_numa_placement(p);
|
||||
@@ -3102,6 +3134,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
|
||||
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
|
||||
|
||||
static inline void check_schedstat_required(void)
|
||||
{
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (schedstat_enabled())
|
||||
return;
|
||||
|
||||
/* Force schedstat enabled if a dependent tracepoint is active */
|
||||
if (trace_sched_stat_wait_enabled() ||
|
||||
trace_sched_stat_sleep_enabled() ||
|
||||
trace_sched_stat_iowait_enabled() ||
|
||||
trace_sched_stat_blocked_enabled() ||
|
||||
trace_sched_stat_runtime_enabled()) {
|
||||
pr_warn_once("Scheduler tracepoints stat_sleep, stat_iowait, "
|
||||
"stat_blocked and stat_runtime require the "
|
||||
"kernel parameter schedstats=enabled or "
|
||||
"kernel.sched_schedstats=1\n");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
{
|
||||
@@ -3122,11 +3174,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
|
||||
if (flags & ENQUEUE_WAKEUP) {
|
||||
place_entity(cfs_rq, se, 0);
|
||||
enqueue_sleeper(cfs_rq, se);
|
||||
if (schedstat_enabled())
|
||||
enqueue_sleeper(cfs_rq, se);
|
||||
}
|
||||
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
check_schedstat_required();
|
||||
if (schedstat_enabled()) {
|
||||
update_stats_enqueue(cfs_rq, se);
|
||||
check_spread(cfs_rq, se);
|
||||
}
|
||||
if (se != cfs_rq->curr)
|
||||
__enqueue_entity(cfs_rq, se);
|
||||
se->on_rq = 1;
|
||||
@@ -3193,19 +3249,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
|
||||
update_curr(cfs_rq);
|
||||
dequeue_entity_load_avg(cfs_rq, se);
|
||||
|
||||
update_stats_dequeue(cfs_rq, se);
|
||||
if (flags & DEQUEUE_SLEEP) {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
if (entity_is_task(se)) {
|
||||
struct task_struct *tsk = task_of(se);
|
||||
|
||||
if (tsk->state & TASK_INTERRUPTIBLE)
|
||||
se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
|
||||
if (tsk->state & TASK_UNINTERRUPTIBLE)
|
||||
se->statistics.block_start = rq_clock(rq_of(cfs_rq));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (schedstat_enabled())
|
||||
update_stats_dequeue(cfs_rq, se, flags);
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
@@ -3279,7 +3324,8 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
* a CPU. So account for the time it spent waiting on the
|
||||
* runqueue.
|
||||
*/
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
if (schedstat_enabled())
|
||||
update_stats_wait_end(cfs_rq, se);
|
||||
__dequeue_entity(cfs_rq, se);
|
||||
update_load_avg(se, 1);
|
||||
}
|
||||
@@ -3292,7 +3338,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
* least twice that of our own weight (i.e. dont track it
|
||||
* when there are only lesser-weight tasks around):
|
||||
*/
|
||||
if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
|
||||
if (schedstat_enabled() && rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
|
||||
se->statistics.slice_max = max(se->statistics.slice_max,
|
||||
se->sum_exec_runtime - se->prev_sum_exec_runtime);
|
||||
}
|
||||
@@ -3375,9 +3421,13 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
|
||||
/* throttle cfs_rqs exceeding runtime */
|
||||
check_cfs_rq_runtime(cfs_rq);
|
||||
|
||||
check_spread(cfs_rq, prev);
|
||||
if (schedstat_enabled()) {
|
||||
check_spread(cfs_rq, prev);
|
||||
if (prev->on_rq)
|
||||
update_stats_wait_start(cfs_rq, prev);
|
||||
}
|
||||
|
||||
if (prev->on_rq) {
|
||||
update_stats_wait_start(cfs_rq, prev);
|
||||
/* Put 'current' back into the tree. */
|
||||
__enqueue_entity(cfs_rq, prev);
|
||||
/* in !on_rq case, update occurred at dequeue */
|
||||
@@ -4459,9 +4509,17 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
|
||||
|
||||
/* scale is effectively 1 << i now, and >> i divides by scale */
|
||||
|
||||
old_load = this_rq->cpu_load[i] - tickless_load;
|
||||
old_load = this_rq->cpu_load[i];
|
||||
old_load = decay_load_missed(old_load, pending_updates - 1, i);
|
||||
old_load += tickless_load;
|
||||
if (tickless_load) {
|
||||
old_load -= decay_load_missed(tickless_load, pending_updates - 1, i);
|
||||
/*
|
||||
* old_load can never be a negative value because a
|
||||
* decayed tickless_load cannot be greater than the
|
||||
* original tickless_load.
|
||||
*/
|
||||
old_load += tickless_load;
|
||||
}
|
||||
new_load = this_load;
|
||||
/*
|
||||
* Round up the averaging division if load is increasing. This
|
||||
@@ -4484,6 +4542,25 @@ static unsigned long weighted_cpuload(const int cpu)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
static void __update_cpu_load_nohz(struct rq *this_rq,
|
||||
unsigned long curr_jiffies,
|
||||
unsigned long load,
|
||||
int active)
|
||||
{
|
||||
unsigned long pending_updates;
|
||||
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
if (pending_updates) {
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
/*
|
||||
* In the regular NOHZ case, we were idle, this means load 0.
|
||||
* In the NOHZ_FULL case, we were non-idle, we should consider
|
||||
* its weighted load.
|
||||
*/
|
||||
__update_cpu_load(this_rq, load, pending_updates, active);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no sane way to deal with nohz on smp when using jiffies because the
|
||||
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
|
||||
@@ -4501,22 +4578,15 @@ static unsigned long weighted_cpuload(const int cpu)
|
||||
* Called from nohz_idle_balance() to update the load ratings before doing the
|
||||
* idle balance.
|
||||
*/
|
||||
static void update_idle_cpu_load(struct rq *this_rq)
|
||||
static void update_cpu_load_idle(struct rq *this_rq)
|
||||
{
|
||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
||||
unsigned long pending_updates;
|
||||
|
||||
/*
|
||||
* bail if there's load or we're actually up-to-date.
|
||||
*/
|
||||
if (load || curr_jiffies == this_rq->last_load_update_tick)
|
||||
if (weighted_cpuload(cpu_of(this_rq)))
|
||||
return;
|
||||
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
|
||||
__update_cpu_load(this_rq, load, pending_updates, 0);
|
||||
__update_cpu_load_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4527,22 +4597,12 @@ void update_cpu_load_nohz(int active)
|
||||
struct rq *this_rq = this_rq();
|
||||
unsigned long curr_jiffies = READ_ONCE(jiffies);
|
||||
unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
|
||||
unsigned long pending_updates;
|
||||
|
||||
if (curr_jiffies == this_rq->last_load_update_tick)
|
||||
return;
|
||||
|
||||
raw_spin_lock(&this_rq->lock);
|
||||
pending_updates = curr_jiffies - this_rq->last_load_update_tick;
|
||||
if (pending_updates) {
|
||||
this_rq->last_load_update_tick = curr_jiffies;
|
||||
/*
|
||||
* In the regular NOHZ case, we were idle, this means load 0.
|
||||
* In the NOHZ_FULL case, we were non-idle, we should consider
|
||||
* its weighted load.
|
||||
*/
|
||||
__update_cpu_load(this_rq, load, pending_updates, active);
|
||||
}
|
||||
__update_cpu_load_nohz(this_rq, curr_jiffies, load, active);
|
||||
raw_spin_unlock(&this_rq->lock);
|
||||
}
|
||||
#endif /* CONFIG_NO_HZ */
|
||||
@@ -4554,7 +4614,7 @@ void update_cpu_load_active(struct rq *this_rq)
|
||||
{
|
||||
unsigned long load = weighted_cpuload(cpu_of(this_rq));
|
||||
/*
|
||||
* See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
|
||||
* See the mess around update_cpu_load_idle() / update_cpu_load_nohz().
|
||||
*/
|
||||
this_rq->last_load_update_tick = jiffies;
|
||||
__update_cpu_load(this_rq, load, 1, 1);
|
||||
@@ -7848,7 +7908,7 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
||||
if (time_after_eq(jiffies, rq->next_balance)) {
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
update_idle_cpu_load(rq);
|
||||
update_cpu_load_idle(rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rebalance_domains(rq, CPU_IDLE);
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/cpuhotplug.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/stackprotector.h>
|
||||
@@ -193,8 +194,6 @@ exit_idle:
|
||||
rcu_idle_exit();
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU(bool, cpu_dead_idle);
|
||||
|
||||
/*
|
||||
* Generic idle loop implementation
|
||||
*
|
||||
@@ -221,10 +220,7 @@ static void cpu_idle_loop(void)
|
||||
rmb();
|
||||
|
||||
if (cpu_is_offline(smp_processor_id())) {
|
||||
rcu_cpu_notify(NULL, CPU_DYING_IDLE,
|
||||
(void *)(long)smp_processor_id());
|
||||
smp_mb(); /* all activity before dead. */
|
||||
this_cpu_write(cpu_dead_idle, true);
|
||||
cpuhp_report_idle_dead();
|
||||
arch_cpu_idle_dead();
|
||||
}
|
||||
|
||||
@@ -291,5 +287,6 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
boot_init_stack_canary();
|
||||
#endif
|
||||
arch_cpu_idle_prepare();
|
||||
cpuhp_online_idle(state);
|
||||
cpu_idle_loop();
|
||||
}
|
||||
|
@@ -58,7 +58,15 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||
raw_spin_lock(&rt_b->rt_runtime_lock);
|
||||
if (!rt_b->rt_period_active) {
|
||||
rt_b->rt_period_active = 1;
|
||||
hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period);
|
||||
/*
|
||||
* SCHED_DEADLINE updates the bandwidth, as a run away
|
||||
* RT task with a DL task could hog a CPU. But DL does
|
||||
* not reset the period. If a deadline task was running
|
||||
* without an RT task running, it can cause RT tasks to
|
||||
* throttle when they start up. Kick the timer right away
|
||||
* to update the period.
|
||||
*/
|
||||
hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0));
|
||||
hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED);
|
||||
}
|
||||
raw_spin_unlock(&rt_b->rt_runtime_lock);
|
||||
@@ -436,7 +444,7 @@ static void dequeue_top_rt_rq(struct rt_rq *rt_rq);
|
||||
|
||||
static inline int on_rt_rq(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
return !list_empty(&rt_se->run_list);
|
||||
return rt_se->on_rq;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_GROUP_SCHED
|
||||
@@ -482,8 +490,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
|
||||
return rt_se->my_q;
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags);
|
||||
|
||||
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
{
|
||||
@@ -499,7 +507,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
if (!rt_se)
|
||||
enqueue_top_rt_rq(rt_rq);
|
||||
else if (!on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se, false);
|
||||
enqueue_rt_entity(rt_se, 0);
|
||||
|
||||
if (rt_rq->highest_prio.curr < curr->prio)
|
||||
resched_curr(rq);
|
||||
@@ -516,7 +524,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
|
||||
if (!rt_se)
|
||||
dequeue_top_rt_rq(rt_rq);
|
||||
else if (on_rt_rq(rt_se))
|
||||
dequeue_rt_entity(rt_se);
|
||||
dequeue_rt_entity(rt_se, 0);
|
||||
}
|
||||
|
||||
static inline int rt_rq_throttled(struct rt_rq *rt_rq)
|
||||
@@ -1141,6 +1149,20 @@ unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se)
|
||||
{
|
||||
struct rt_rq *group_rq = group_rt_rq(rt_se);
|
||||
struct task_struct *tsk;
|
||||
|
||||
if (group_rq)
|
||||
return group_rq->rr_nr_running;
|
||||
|
||||
tsk = rt_task_of(rt_se);
|
||||
|
||||
return (tsk->policy == SCHED_RR) ? 1 : 0;
|
||||
}
|
||||
|
||||
static inline
|
||||
void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
{
|
||||
@@ -1148,6 +1170,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
|
||||
WARN_ON(!rt_prio(prio));
|
||||
rt_rq->rt_nr_running += rt_se_nr_running(rt_se);
|
||||
rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se);
|
||||
|
||||
inc_rt_prio(rt_rq, prio);
|
||||
inc_rt_migration(rt_se, rt_rq);
|
||||
@@ -1160,13 +1183,37 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
|
||||
WARN_ON(!rt_prio(rt_se_prio(rt_se)));
|
||||
WARN_ON(!rt_rq->rt_nr_running);
|
||||
rt_rq->rt_nr_running -= rt_se_nr_running(rt_se);
|
||||
rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se);
|
||||
|
||||
dec_rt_prio(rt_rq, rt_se_prio(rt_se));
|
||||
dec_rt_migration(rt_se, rt_rq);
|
||||
dec_rt_group(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
/*
|
||||
* Change rt_se->run_list location unless SAVE && !MOVE
|
||||
*
|
||||
* assumes ENQUEUE/DEQUEUE flags match
|
||||
*/
|
||||
static inline bool move_entity(unsigned int flags)
|
||||
{
|
||||
if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array)
|
||||
{
|
||||
list_del_init(&rt_se->run_list);
|
||||
|
||||
if (list_empty(array->queue + rt_se_prio(rt_se)))
|
||||
__clear_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
|
||||
rt_se->on_list = 0;
|
||||
}
|
||||
|
||||
static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
@@ -1179,26 +1226,37 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
* get throttled and the current group doesn't have any other
|
||||
* active members.
|
||||
*/
|
||||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
|
||||
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) {
|
||||
if (rt_se->on_list)
|
||||
__delist_rt_entity(rt_se, array);
|
||||
return;
|
||||
}
|
||||
|
||||
if (head)
|
||||
list_add(&rt_se->run_list, queue);
|
||||
else
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
if (move_entity(flags)) {
|
||||
WARN_ON_ONCE(rt_se->on_list);
|
||||
if (flags & ENQUEUE_HEAD)
|
||||
list_add(&rt_se->run_list, queue);
|
||||
else
|
||||
list_add_tail(&rt_se->run_list, queue);
|
||||
|
||||
__set_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
rt_se->on_list = 1;
|
||||
}
|
||||
rt_se->on_rq = 1;
|
||||
|
||||
inc_rt_tasks(rt_se, rt_rq);
|
||||
}
|
||||
|
||||
static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_prio_array *array = &rt_rq->active;
|
||||
|
||||
list_del_init(&rt_se->run_list);
|
||||
if (list_empty(array->queue + rt_se_prio(rt_se)))
|
||||
__clear_bit(rt_se_prio(rt_se), array->bitmap);
|
||||
if (move_entity(flags)) {
|
||||
WARN_ON_ONCE(!rt_se->on_list);
|
||||
__delist_rt_entity(rt_se, array);
|
||||
}
|
||||
rt_se->on_rq = 0;
|
||||
|
||||
dec_rt_tasks(rt_se, rt_rq);
|
||||
}
|
||||
@@ -1207,7 +1265,7 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
* Because the prio of an upper entry depends on the lower
|
||||
* entries, we must remove entries top - down.
|
||||
*/
|
||||
static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
|
||||
static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct sched_rt_entity *back = NULL;
|
||||
|
||||
@@ -1220,31 +1278,31 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
|
||||
|
||||
for (rt_se = back; rt_se; rt_se = rt_se->back) {
|
||||
if (on_rt_rq(rt_se))
|
||||
__dequeue_rt_entity(rt_se);
|
||||
__dequeue_rt_entity(rt_se, flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
|
||||
static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_se(rt_se);
|
||||
|
||||
dequeue_rt_stack(rt_se);
|
||||
dequeue_rt_stack(rt_se, flags);
|
||||
for_each_sched_rt_entity(rt_se)
|
||||
__enqueue_rt_entity(rt_se, head);
|
||||
__enqueue_rt_entity(rt_se, flags);
|
||||
enqueue_top_rt_rq(&rq->rt);
|
||||
}
|
||||
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
|
||||
static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)
|
||||
{
|
||||
struct rq *rq = rq_of_rt_se(rt_se);
|
||||
|
||||
dequeue_rt_stack(rt_se);
|
||||
dequeue_rt_stack(rt_se, flags);
|
||||
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
struct rt_rq *rt_rq = group_rt_rq(rt_se);
|
||||
|
||||
if (rt_rq && rt_rq->rt_nr_running)
|
||||
__enqueue_rt_entity(rt_se, false);
|
||||
__enqueue_rt_entity(rt_se, flags);
|
||||
}
|
||||
enqueue_top_rt_rq(&rq->rt);
|
||||
}
|
||||
@@ -1260,7 +1318,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
||||
if (flags & ENQUEUE_WAKEUP)
|
||||
rt_se->timeout = 0;
|
||||
|
||||
enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
|
||||
enqueue_rt_entity(rt_se, flags);
|
||||
|
||||
if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
|
||||
enqueue_pushable_task(rq, p);
|
||||
@@ -1271,7 +1329,7 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
|
||||
struct sched_rt_entity *rt_se = &p->rt;
|
||||
|
||||
update_curr_rt(rq);
|
||||
dequeue_rt_entity(rt_se);
|
||||
dequeue_rt_entity(rt_se, flags);
|
||||
|
||||
dequeue_pushable_task(rq, p);
|
||||
}
|
||||
|
@@ -3,6 +3,7 @@
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/deadline.h>
|
||||
#include <linux/binfmts.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/stop_machine.h>
|
||||
@@ -318,7 +319,6 @@ extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
|
||||
struct sched_entity *se, int cpu,
|
||||
struct sched_entity *parent);
|
||||
extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
|
||||
|
||||
extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
|
||||
extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
|
||||
@@ -450,6 +450,7 @@ static inline int rt_bandwidth_enabled(void)
|
||||
struct rt_rq {
|
||||
struct rt_prio_array active;
|
||||
unsigned int rt_nr_running;
|
||||
unsigned int rr_nr_running;
|
||||
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
|
||||
struct {
|
||||
int curr; /* highest queued rt task prio */
|
||||
@@ -909,6 +910,18 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
|
||||
|
||||
extern int group_balance_cpu(struct sched_group *sg);
|
||||
|
||||
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
|
||||
void register_sched_domain_sysctl(void);
|
||||
void unregister_sched_domain_sysctl(void);
|
||||
#else
|
||||
static inline void register_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
static inline void unregister_sched_domain_sysctl(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
static inline void sched_ttwu_pending(void) { }
|
||||
@@ -1022,6 +1035,7 @@ extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
|
||||
#endif /* SCHED_DEBUG && HAVE_JUMP_LABEL */
|
||||
|
||||
extern struct static_key_false sched_numa_balancing;
|
||||
extern struct static_key_false sched_schedstats;
|
||||
|
||||
static inline u64 global_rt_period(void)
|
||||
{
|
||||
@@ -1130,18 +1144,40 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
|
||||
extern const int sched_prio_to_weight[40];
|
||||
extern const u32 sched_prio_to_wmult[40];
|
||||
|
||||
/*
|
||||
* {de,en}queue flags:
|
||||
*
|
||||
* DEQUEUE_SLEEP - task is no longer runnable
|
||||
* ENQUEUE_WAKEUP - task just became runnable
|
||||
*
|
||||
* SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
|
||||
* are in a known state which allows modification. Such pairs
|
||||
* should preserve as much state as possible.
|
||||
*
|
||||
* MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
|
||||
* in the runqueue.
|
||||
*
|
||||
* ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
|
||||
* ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
|
||||
* ENQUEUE_WAKING - sched_class::task_waking was called
|
||||
*
|
||||
*/
|
||||
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */
|
||||
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */
|
||||
|
||||
#define ENQUEUE_WAKEUP 0x01
|
||||
#define ENQUEUE_HEAD 0x02
|
||||
#define ENQUEUE_RESTORE 0x02
|
||||
#define ENQUEUE_MOVE 0x04
|
||||
|
||||
#define ENQUEUE_HEAD 0x08
|
||||
#define ENQUEUE_REPLENISH 0x10
|
||||
#ifdef CONFIG_SMP
|
||||
#define ENQUEUE_WAKING 0x04 /* sched_class::task_waking was called */
|
||||
#define ENQUEUE_WAKING 0x20
|
||||
#else
|
||||
#define ENQUEUE_WAKING 0x00
|
||||
#endif
|
||||
#define ENQUEUE_REPLENISH 0x08
|
||||
#define ENQUEUE_RESTORE 0x10
|
||||
|
||||
#define DEQUEUE_SLEEP 0x01
|
||||
#define DEQUEUE_SAVE 0x02
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
@@ -1278,6 +1314,35 @@ unsigned long to_ratio(u64 period, u64 runtime);
|
||||
|
||||
extern void init_entity_runnable_average(struct sched_entity *se);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
extern bool sched_can_stop_tick(struct rq *rq);
|
||||
|
||||
/*
|
||||
* Tick may be needed by tasks in the runqueue depending on their policy and
|
||||
* requirements. If tick is needed, lets send the target an IPI to kick it out of
|
||||
* nohz mode if necessary.
|
||||
*/
|
||||
static inline void sched_update_tick_dependency(struct rq *rq)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!tick_nohz_full_enabled())
|
||||
return;
|
||||
|
||||
cpu = cpu_of(rq);
|
||||
|
||||
if (!tick_nohz_full_cpu(cpu))
|
||||
return;
|
||||
|
||||
if (sched_can_stop_tick(rq))
|
||||
tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
|
||||
else
|
||||
tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
|
||||
}
|
||||
#else
|
||||
static inline void sched_update_tick_dependency(struct rq *rq) { }
|
||||
#endif
|
||||
|
||||
static inline void add_nr_running(struct rq *rq, unsigned count)
|
||||
{
|
||||
unsigned prev_nr = rq->nr_running;
|
||||
@@ -1289,26 +1354,16 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
|
||||
if (!rq->rd->overload)
|
||||
rq->rd->overload = true;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
if (tick_nohz_full_cpu(rq->cpu)) {
|
||||
/*
|
||||
* Tick is needed if more than one task runs on a CPU.
|
||||
* Send the target an IPI to kick it out of nohz mode.
|
||||
*
|
||||
* We assume that IPI implies full memory barrier and the
|
||||
* new value of rq->nr_running is visible on reception
|
||||
* from the target.
|
||||
*/
|
||||
tick_nohz_full_kick_cpu(rq->cpu);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
sched_update_tick_dependency(rq);
|
||||
}
|
||||
|
||||
static inline void sub_nr_running(struct rq *rq, unsigned count)
|
||||
{
|
||||
rq->nr_running -= count;
|
||||
/* Check if we still need preemption */
|
||||
sched_update_tick_dependency(rq);
|
||||
}
|
||||
|
||||
static inline void rq_last_tick_reset(struct rq *rq)
|
||||
|
@@ -29,9 +29,10 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
if (rq)
|
||||
rq->rq_sched_info.run_delay += delta;
|
||||
}
|
||||
# define schedstat_inc(rq, field) do { (rq)->field++; } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { (rq)->field += (amt); } while (0)
|
||||
# define schedstat_set(var, val) do { var = (val); } while (0)
|
||||
# define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
|
||||
# define schedstat_inc(rq, field) do { if (schedstat_enabled()) { (rq)->field++; } } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { if (schedstat_enabled()) { (rq)->field += (amt); } } while (0)
|
||||
# define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
|
||||
#else /* !CONFIG_SCHEDSTATS */
|
||||
static inline void
|
||||
rq_sched_info_arrive(struct rq *rq, unsigned long long delta)
|
||||
@@ -42,6 +43,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
|
||||
static inline void
|
||||
rq_sched_info_depart(struct rq *rq, unsigned long long delta)
|
||||
{}
|
||||
# define schedstat_enabled() 0
|
||||
# define schedstat_inc(rq, field) do { } while (0)
|
||||
# define schedstat_add(rq, field, amt) do { } while (0)
|
||||
# define schedstat_set(var, val) do { } while (0)
|
||||
|
123
kernel/sched/swait.c
Normal file
123
kernel/sched/swait.c
Normal file
@@ -0,0 +1,123 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/swait.h>
|
||||
|
||||
void __init_swait_queue_head(struct swait_queue_head *q, const char *name,
|
||||
struct lock_class_key *key)
|
||||
{
|
||||
raw_spin_lock_init(&q->lock);
|
||||
lockdep_set_class_and_name(&q->lock, key, name);
|
||||
INIT_LIST_HEAD(&q->task_list);
|
||||
}
|
||||
EXPORT_SYMBOL(__init_swait_queue_head);
|
||||
|
||||
/*
|
||||
* The thing about the wake_up_state() return value; I think we can ignore it.
|
||||
*
|
||||
* If for some reason it would return 0, that means the previously waiting
|
||||
* task is already running, so it will observe condition true (or has already).
|
||||
*/
|
||||
void swake_up_locked(struct swait_queue_head *q)
|
||||
{
|
||||
struct swait_queue *curr;
|
||||
|
||||
if (list_empty(&q->task_list))
|
||||
return;
|
||||
|
||||
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
|
||||
wake_up_process(curr->task);
|
||||
list_del_init(&curr->task_list);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_locked);
|
||||
|
||||
void swake_up(struct swait_queue_head *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!swait_active(q))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
swake_up_locked(q);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up);
|
||||
|
||||
/*
|
||||
* Does not allow usage from IRQ disabled, since we must be able to
|
||||
* release IRQs to guarantee bounded hold time.
|
||||
*/
|
||||
void swake_up_all(struct swait_queue_head *q)
|
||||
{
|
||||
struct swait_queue *curr;
|
||||
LIST_HEAD(tmp);
|
||||
|
||||
if (!swait_active(q))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irq(&q->lock);
|
||||
list_splice_init(&q->task_list, &tmp);
|
||||
while (!list_empty(&tmp)) {
|
||||
curr = list_first_entry(&tmp, typeof(*curr), task_list);
|
||||
|
||||
wake_up_state(curr->task, TASK_NORMAL);
|
||||
list_del_init(&curr->task_list);
|
||||
|
||||
if (list_empty(&tmp))
|
||||
break;
|
||||
|
||||
raw_spin_unlock_irq(&q->lock);
|
||||
raw_spin_lock_irq(&q->lock);
|
||||
}
|
||||
raw_spin_unlock_irq(&q->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(swake_up_all);
|
||||
|
||||
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
wait->task = current;
|
||||
if (list_empty(&wait->task_list))
|
||||
list_add(&wait->task_list, &q->task_list);
|
||||
}
|
||||
|
||||
void prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
__prepare_to_swait(q, wait);
|
||||
set_current_state(state);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_swait);
|
||||
|
||||
long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state)
|
||||
{
|
||||
if (signal_pending_state(state, current))
|
||||
return -ERESTARTSYS;
|
||||
|
||||
prepare_to_swait(q, wait, state);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(prepare_to_swait_event);
|
||||
|
||||
void __finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
__set_current_state(TASK_RUNNING);
|
||||
if (!list_empty(&wait->task_list))
|
||||
list_del_init(&wait->task_list);
|
||||
}
|
||||
|
||||
void finish_swait(struct swait_queue_head *q, struct swait_queue *wait)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
if (!list_empty_careful(&wait->task_list)) {
|
||||
raw_spin_lock_irqsave(&q->lock, flags);
|
||||
list_del_init(&wait->task_list);
|
||||
raw_spin_unlock_irqrestore(&q->lock, flags);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(finish_swait);
|
10
kernel/smp.c
10
kernel/smp.c
@@ -105,13 +105,12 @@ void __init call_function_init(void)
|
||||
* previous function call. For multi-cpu calls its even more interesting
|
||||
* as we'll have to ensure no other cpu is observing our csd.
|
||||
*/
|
||||
static void csd_lock_wait(struct call_single_data *csd)
|
||||
static __always_inline void csd_lock_wait(struct call_single_data *csd)
|
||||
{
|
||||
while (smp_load_acquire(&csd->flags) & CSD_FLAG_LOCK)
|
||||
cpu_relax();
|
||||
smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
|
||||
}
|
||||
|
||||
static void csd_lock(struct call_single_data *csd)
|
||||
static __always_inline void csd_lock(struct call_single_data *csd)
|
||||
{
|
||||
csd_lock_wait(csd);
|
||||
csd->flags |= CSD_FLAG_LOCK;
|
||||
@@ -124,7 +123,7 @@ static void csd_lock(struct call_single_data *csd)
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
static void csd_unlock(struct call_single_data *csd)
|
||||
static __always_inline void csd_unlock(struct call_single_data *csd)
|
||||
{
|
||||
WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
|
||||
|
||||
@@ -569,6 +568,7 @@ void __init smp_init(void)
|
||||
unsigned int cpu;
|
||||
|
||||
idle_threads_init();
|
||||
cpuhp_threads_init();
|
||||
|
||||
/* FIXME: This should be done in userspace --RR */
|
||||
for_each_present_cpu(cpu) {
|
||||
|
@@ -226,7 +226,7 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
|
||||
kthread_unpark(tsk);
|
||||
}
|
||||
|
||||
void smpboot_unpark_threads(unsigned int cpu)
|
||||
int smpboot_unpark_threads(unsigned int cpu)
|
||||
{
|
||||
struct smp_hotplug_thread *cur;
|
||||
|
||||
@@ -235,6 +235,7 @@ void smpboot_unpark_threads(unsigned int cpu)
|
||||
if (cpumask_test_cpu(cpu, cur->cpumask))
|
||||
smpboot_unpark_thread(cur, cpu);
|
||||
mutex_unlock(&smpboot_threads_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
|
||||
@@ -245,7 +246,7 @@ static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
|
||||
kthread_park(tsk);
|
||||
}
|
||||
|
||||
void smpboot_park_threads(unsigned int cpu)
|
||||
int smpboot_park_threads(unsigned int cpu)
|
||||
{
|
||||
struct smp_hotplug_thread *cur;
|
||||
|
||||
@@ -253,6 +254,7 @@ void smpboot_park_threads(unsigned int cpu)
|
||||
list_for_each_entry_reverse(cur, &hotplug_threads, list)
|
||||
smpboot_park_thread(cur, cpu);
|
||||
mutex_unlock(&smpboot_threads_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void smpboot_destroy_threads(struct smp_hotplug_thread *ht)
|
||||
|
@@ -14,7 +14,9 @@ static inline void idle_threads_init(void) { }
|
||||
#endif
|
||||
|
||||
int smpboot_create_threads(unsigned int cpu);
|
||||
void smpboot_park_threads(unsigned int cpu);
|
||||
void smpboot_unpark_threads(unsigned int cpu);
|
||||
int smpboot_park_threads(unsigned int cpu);
|
||||
int smpboot_unpark_threads(unsigned int cpu);
|
||||
|
||||
void __init cpuhp_threads_init(void);
|
||||
|
||||
#endif
|
||||
|
@@ -116,9 +116,9 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt)
|
||||
|
||||
if (preempt_count() == cnt) {
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
current->preempt_disable_ip = get_parent_ip(CALLER_ADDR1);
|
||||
current->preempt_disable_ip = get_lock_parent_ip();
|
||||
#endif
|
||||
trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
|
||||
trace_preempt_off(CALLER_ADDR0, get_lock_parent_ip());
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__local_bh_disable_ip);
|
||||
|
@@ -350,6 +350,17 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
{
|
||||
.procname = "sched_schedstats",
|
||||
.data = NULL,
|
||||
.maxlen = sizeof(unsigned int),
|
||||
.mode = 0644,
|
||||
.proc_handler = sysctl_schedstats,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif /* CONFIG_SCHEDSTATS */
|
||||
#endif /* CONFIG_SMP */
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
{
|
||||
@@ -505,7 +516,7 @@ static struct ctl_table kern_table[] = {
|
||||
.data = &latencytop_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = sysctl_latencytop,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
|
@@ -323,13 +323,42 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||
/* cs is a watchdog. */
|
||||
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
|
||||
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
|
||||
/* Pick the best watchdog. */
|
||||
if (!watchdog || cs->rating > watchdog->rating) {
|
||||
watchdog = cs;
|
||||
/* Reset watchdog cycles */
|
||||
clocksource_reset_watchdog();
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&watchdog_lock, flags);
|
||||
}
|
||||
|
||||
static void clocksource_select_watchdog(bool fallback)
|
||||
{
|
||||
struct clocksource *cs, *old_wd;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&watchdog_lock, flags);
|
||||
/* save current watchdog */
|
||||
old_wd = watchdog;
|
||||
if (fallback)
|
||||
watchdog = NULL;
|
||||
|
||||
list_for_each_entry(cs, &clocksource_list, list) {
|
||||
/* cs is a clocksource to be watched. */
|
||||
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
|
||||
continue;
|
||||
|
||||
/* Skip current if we were requested for a fallback. */
|
||||
if (fallback && cs == old_wd)
|
||||
continue;
|
||||
|
||||
/* Pick the best watchdog. */
|
||||
if (!watchdog || cs->rating > watchdog->rating)
|
||||
watchdog = cs;
|
||||
}
|
||||
/* If we failed to find a fallback restore the old one. */
|
||||
if (!watchdog)
|
||||
watchdog = old_wd;
|
||||
|
||||
/* If we changed the watchdog we need to reset cycles. */
|
||||
if (watchdog != old_wd)
|
||||
clocksource_reset_watchdog();
|
||||
|
||||
/* Check if the watchdog timer needs to be started. */
|
||||
clocksource_start_watchdog();
|
||||
spin_unlock_irqrestore(&watchdog_lock, flags);
|
||||
@@ -404,6 +433,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
|
||||
}
|
||||
|
||||
static void clocksource_select_watchdog(bool fallback) { }
|
||||
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
|
||||
static inline void clocksource_resume_watchdog(void) { }
|
||||
static inline int __clocksource_watchdog_kthread(void) { return 0; }
|
||||
@@ -736,6 +766,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
|
||||
clocksource_enqueue(cs);
|
||||
clocksource_enqueue_watchdog(cs);
|
||||
clocksource_select();
|
||||
clocksource_select_watchdog(false);
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
return 0;
|
||||
}
|
||||
@@ -758,6 +789,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
|
||||
mutex_lock(&clocksource_mutex);
|
||||
__clocksource_change_rating(cs, rating);
|
||||
clocksource_select();
|
||||
clocksource_select_watchdog(false);
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(clocksource_change_rating);
|
||||
@@ -767,12 +799,12 @@ EXPORT_SYMBOL(clocksource_change_rating);
|
||||
*/
|
||||
static int clocksource_unbind(struct clocksource *cs)
|
||||
{
|
||||
/*
|
||||
* I really can't convince myself to support this on hardware
|
||||
* designed by lobotomized monkeys.
|
||||
*/
|
||||
if (clocksource_is_watchdog(cs))
|
||||
return -EBUSY;
|
||||
if (clocksource_is_watchdog(cs)) {
|
||||
/* Select and try to install a replacement watchdog. */
|
||||
clocksource_select_watchdog(true);
|
||||
if (clocksource_is_watchdog(cs))
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
if (cs == curr_clocksource) {
|
||||
/* Select and try to install a replacement clock source */
|
||||
|
@@ -68,7 +68,7 @@ static struct clocksource clocksource_jiffies = {
|
||||
.name = "jiffies",
|
||||
.rating = 1, /* lowest valid rating*/
|
||||
.read = jiffies_read,
|
||||
.mask = 0xffffffff, /*32bits*/
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.mult = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
|
||||
.shift = JIFFIES_SHIFT,
|
||||
.max_cycles = 10,
|
||||
|
@@ -333,7 +333,6 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
|
||||
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
|
||||
@@ -517,6 +516,10 @@ static void arm_timer(struct k_itimer *timer)
|
||||
cputime_expires->sched_exp = exp;
|
||||
break;
|
||||
}
|
||||
if (CPUCLOCK_PERTHREAD(timer->it_clock))
|
||||
tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
|
||||
else
|
||||
tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -582,39 +585,6 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
static void nohz_kick_work_fn(struct work_struct *work)
|
||||
{
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(nohz_kick_work, nohz_kick_work_fn);
|
||||
|
||||
/*
|
||||
* We need the IPIs to be sent from sane process context.
|
||||
* The posix cpu timers are always set with irqs disabled.
|
||||
*/
|
||||
static void posix_cpu_timer_kick_nohz(void)
|
||||
{
|
||||
if (context_tracking_is_enabled())
|
||||
schedule_work(&nohz_kick_work);
|
||||
}
|
||||
|
||||
bool posix_cpu_timers_can_stop_tick(struct task_struct *tsk)
|
||||
{
|
||||
if (!task_cputime_zero(&tsk->cputime_expires))
|
||||
return false;
|
||||
|
||||
/* Check if cputimer is running. This is accessed without locking. */
|
||||
if (READ_ONCE(tsk->signal->cputimer.running))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
static inline void posix_cpu_timer_kick_nohz(void) { }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Guts of sys_timer_settime for CPU timers.
|
||||
* This is called with the timer locked and interrupts disabled.
|
||||
@@ -761,8 +731,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
|
||||
sample_to_timespec(timer->it_clock,
|
||||
old_incr, &old->it_interval);
|
||||
}
|
||||
if (!ret)
|
||||
posix_cpu_timer_kick_nohz();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -911,6 +880,8 @@ static void check_thread_timers(struct task_struct *tsk,
|
||||
__group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
|
||||
}
|
||||
}
|
||||
if (task_cputime_zero(tsk_expires))
|
||||
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static inline void stop_process_timers(struct signal_struct *sig)
|
||||
@@ -919,6 +890,7 @@ static inline void stop_process_timers(struct signal_struct *sig)
|
||||
|
||||
/* Turn off cputimer->running. This is done without locking. */
|
||||
WRITE_ONCE(cputimer->running, false);
|
||||
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static u32 onecputick;
|
||||
@@ -1095,8 +1067,6 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
|
||||
arm_timer(timer);
|
||||
unlock_task_sighand(p, &flags);
|
||||
|
||||
/* Kick full dynticks CPUs in case they need to tick on the new timer */
|
||||
posix_cpu_timer_kick_nohz();
|
||||
out:
|
||||
timer->it_overrun_last = timer->it_overrun;
|
||||
timer->it_overrun = -1;
|
||||
@@ -1270,7 +1240,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
||||
}
|
||||
|
||||
if (!*newval)
|
||||
goto out;
|
||||
return;
|
||||
*newval += now;
|
||||
}
|
||||
|
||||
@@ -1288,8 +1258,8 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
||||
tsk->signal->cputime_expires.virt_exp = *newval;
|
||||
break;
|
||||
}
|
||||
out:
|
||||
posix_cpu_timer_kick_nohz();
|
||||
|
||||
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
|
||||
}
|
||||
|
||||
static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
|
||||
|
@@ -22,7 +22,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/context_tracking.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
@@ -158,54 +157,63 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
||||
cpumask_var_t tick_nohz_full_mask;
|
||||
cpumask_var_t housekeeping_mask;
|
||||
bool tick_nohz_full_running;
|
||||
static unsigned long tick_dep_mask;
|
||||
|
||||
static bool can_stop_full_tick(void)
|
||||
static void trace_tick_dependency(unsigned long dep)
|
||||
{
|
||||
if (dep & TICK_DEP_MASK_POSIX_TIMER) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_POSIX_TIMER);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_PERF_EVENTS) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_PERF_EVENTS);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_SCHED) {
|
||||
trace_tick_stop(0, TICK_DEP_MASK_SCHED);
|
||||
return;
|
||||
}
|
||||
|
||||
if (dep & TICK_DEP_MASK_CLOCK_UNSTABLE)
|
||||
trace_tick_stop(0, TICK_DEP_MASK_CLOCK_UNSTABLE);
|
||||
}
|
||||
|
||||
static bool can_stop_full_tick(struct tick_sched *ts)
|
||||
{
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
|
||||
if (!sched_can_stop_tick()) {
|
||||
trace_tick_stop(0, "more than 1 task in runqueue\n");
|
||||
if (tick_dep_mask) {
|
||||
trace_tick_dependency(tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!posix_cpu_timers_can_stop_tick(current)) {
|
||||
trace_tick_stop(0, "posix timers running\n");
|
||||
if (ts->tick_dep_mask) {
|
||||
trace_tick_dependency(ts->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!perf_event_can_stop_tick()) {
|
||||
trace_tick_stop(0, "perf events running\n");
|
||||
if (current->tick_dep_mask) {
|
||||
trace_tick_dependency(current->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* sched_clock_tick() needs us? */
|
||||
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
|
||||
/*
|
||||
* TODO: kick full dynticks CPUs when
|
||||
* sched_clock_stable is set.
|
||||
*/
|
||||
if (!sched_clock_stable()) {
|
||||
trace_tick_stop(0, "unstable sched clock\n");
|
||||
/*
|
||||
* Don't allow the user to think they can get
|
||||
* full NO_HZ with this machine.
|
||||
*/
|
||||
WARN_ONCE(tick_nohz_full_running,
|
||||
"NO_HZ FULL will not work with unstable sched clock");
|
||||
if (current->signal->tick_dep_mask) {
|
||||
trace_tick_dependency(current->signal->tick_dep_mask);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void nohz_full_kick_work_func(struct irq_work *work)
|
||||
static void nohz_full_kick_func(struct irq_work *work)
|
||||
{
|
||||
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||
.func = nohz_full_kick_work_func,
|
||||
.func = nohz_full_kick_func,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -214,7 +222,7 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||
* This kick, unlike tick_nohz_full_kick_cpu() and tick_nohz_full_kick_all(),
|
||||
* is NMI safe.
|
||||
*/
|
||||
void tick_nohz_full_kick(void)
|
||||
static void tick_nohz_full_kick(void)
|
||||
{
|
||||
if (!tick_nohz_full_cpu(smp_processor_id()))
|
||||
return;
|
||||
@@ -234,27 +242,112 @@ void tick_nohz_full_kick_cpu(int cpu)
|
||||
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
|
||||
}
|
||||
|
||||
static void nohz_full_kick_ipi(void *info)
|
||||
{
|
||||
/* Empty, the tick restart happens on tick_nohz_irq_exit() */
|
||||
}
|
||||
|
||||
/*
|
||||
* Kick all full dynticks CPUs in order to force these to re-evaluate
|
||||
* their dependency on the tick and restart it if necessary.
|
||||
*/
|
||||
void tick_nohz_full_kick_all(void)
|
||||
static void tick_nohz_full_kick_all(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
if (!tick_nohz_full_running)
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
smp_call_function_many(tick_nohz_full_mask,
|
||||
nohz_full_kick_ipi, NULL, false);
|
||||
tick_nohz_full_kick();
|
||||
for_each_cpu_and(cpu, tick_nohz_full_mask, cpu_online_mask)
|
||||
tick_nohz_full_kick_cpu(cpu);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void tick_nohz_dep_set_all(unsigned long *dep,
|
||||
enum tick_dep_bits bit)
|
||||
{
|
||||
unsigned long prev;
|
||||
|
||||
prev = fetch_or(dep, BIT_MASK(bit));
|
||||
if (!prev)
|
||||
tick_nohz_full_kick_all();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a global tick dependency. Used by perf events that rely on freq and
|
||||
* by unstable clock.
|
||||
*/
|
||||
void tick_nohz_dep_set(enum tick_dep_bits bit)
|
||||
{
|
||||
tick_nohz_dep_set_all(&tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear(enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set per-CPU tick dependency. Used by scheduler and perf events in order to
|
||||
* manage events throttling.
|
||||
*/
|
||||
void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
unsigned long prev;
|
||||
struct tick_sched *ts;
|
||||
|
||||
ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
||||
|
||||
prev = fetch_or(&ts->tick_dep_mask, BIT_MASK(bit));
|
||||
if (!prev) {
|
||||
preempt_disable();
|
||||
/* Perf needs local kick that is NMI safe */
|
||||
if (cpu == smp_processor_id()) {
|
||||
tick_nohz_full_kick();
|
||||
} else {
|
||||
/* Remote irq work not NMI-safe */
|
||||
if (!WARN_ON_ONCE(in_nmi()))
|
||||
tick_nohz_full_kick_cpu(cpu);
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit)
|
||||
{
|
||||
struct tick_sched *ts = per_cpu_ptr(&tick_cpu_sched, cpu);
|
||||
|
||||
clear_bit(bit, &ts->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a per-task tick dependency. Posix CPU timers need this in order to elapse
|
||||
* per task timers.
|
||||
*/
|
||||
void tick_nohz_dep_set_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
||||
{
|
||||
/*
|
||||
* We could optimize this with just kicking the target running the task
|
||||
* if that noise matters for nohz full users.
|
||||
*/
|
||||
tick_nohz_dep_set_all(&tsk->tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_task(struct task_struct *tsk, enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &tsk->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set a per-taskgroup tick dependency. Posix CPU timers need this in order to elapse
|
||||
* per process timers.
|
||||
*/
|
||||
void tick_nohz_dep_set_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
||||
{
|
||||
tick_nohz_dep_set_all(&sig->tick_dep_mask, bit);
|
||||
}
|
||||
|
||||
void tick_nohz_dep_clear_signal(struct signal_struct *sig, enum tick_dep_bits bit)
|
||||
{
|
||||
clear_bit(bit, &sig->tick_dep_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Re-evaluate the need for the tick as we switch the current task.
|
||||
* It might need the tick due to per task/process properties:
|
||||
@@ -263,15 +356,19 @@ void tick_nohz_full_kick_all(void)
|
||||
void __tick_nohz_task_switch(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct tick_sched *ts;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (!tick_nohz_full_cpu(smp_processor_id()))
|
||||
goto out;
|
||||
|
||||
if (tick_nohz_tick_stopped() && !can_stop_full_tick())
|
||||
tick_nohz_full_kick();
|
||||
ts = this_cpu_ptr(&tick_cpu_sched);
|
||||
|
||||
if (ts->tick_stopped) {
|
||||
if (current->tick_dep_mask || current->signal->tick_dep_mask)
|
||||
tick_nohz_full_kick();
|
||||
}
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -689,7 +786,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
|
||||
|
||||
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
|
||||
ts->tick_stopped = 1;
|
||||
trace_tick_stop(1, " ");
|
||||
trace_tick_stop(1, TICK_DEP_MASK_NONE);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -740,7 +837,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
|
||||
if (!ts->tick_stopped && ts->nohz_mode == NOHZ_MODE_INACTIVE)
|
||||
return;
|
||||
|
||||
if (can_stop_full_tick())
|
||||
if (can_stop_full_tick(ts))
|
||||
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
|
||||
else if (ts->tick_stopped)
|
||||
tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
|
||||
|
@@ -60,6 +60,7 @@ struct tick_sched {
|
||||
u64 next_timer;
|
||||
ktime_t idle_expires;
|
||||
int do_timer_last;
|
||||
unsigned long tick_dep_mask;
|
||||
};
|
||||
|
||||
extern struct tick_sched *tick_get_tick_sched(int cpu);
|
||||
|
@@ -233,6 +233,7 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
|
||||
u64 tmp, ntpinterval;
|
||||
struct clocksource *old_clock;
|
||||
|
||||
++tk->cs_was_changed_seq;
|
||||
old_clock = tk->tkr_mono.clock;
|
||||
tk->tkr_mono.clock = clock;
|
||||
tk->tkr_mono.read = clock->read;
|
||||
@@ -298,19 +299,36 @@ u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
|
||||
static inline u32 arch_gettimeoffset(void) { return 0; }
|
||||
#endif
|
||||
|
||||
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
|
||||
static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
|
||||
cycle_t delta)
|
||||
{
|
||||
cycle_t delta;
|
||||
s64 nsec;
|
||||
|
||||
delta = timekeeping_get_delta(tkr);
|
||||
|
||||
nsec = (delta * tkr->mult + tkr->xtime_nsec) >> tkr->shift;
|
||||
nsec = delta * tkr->mult + tkr->xtime_nsec;
|
||||
nsec >>= tkr->shift;
|
||||
|
||||
/* If arch requires, add in get_arch_timeoffset() */
|
||||
return nsec + arch_gettimeoffset();
|
||||
}
|
||||
|
||||
static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
|
||||
{
|
||||
cycle_t delta;
|
||||
|
||||
delta = timekeeping_get_delta(tkr);
|
||||
return timekeeping_delta_to_ns(tkr, delta);
|
||||
}
|
||||
|
||||
static inline s64 timekeeping_cycles_to_ns(struct tk_read_base *tkr,
|
||||
cycle_t cycles)
|
||||
{
|
||||
cycle_t delta;
|
||||
|
||||
/* calculate the delta since the last update_wall_time */
|
||||
delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
|
||||
return timekeeping_delta_to_ns(tkr, delta);
|
||||
}
|
||||
|
||||
/**
|
||||
* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
|
||||
* @tkr: Timekeeping readout base from which we take the update
|
||||
@@ -857,44 +875,262 @@ time64_t __ktime_get_real_seconds(void)
|
||||
return tk->xtime_sec;
|
||||
}
|
||||
|
||||
|
||||
#ifdef CONFIG_NTP_PPS
|
||||
|
||||
/**
|
||||
* ktime_get_raw_and_real_ts64 - get day and raw monotonic time in timespec format
|
||||
* @ts_raw: pointer to the timespec to be set to raw monotonic time
|
||||
* @ts_real: pointer to the timespec to be set to the time of day
|
||||
*
|
||||
* This function reads both the time of day and raw monotonic time at the
|
||||
* same time atomically and stores the resulting timestamps in timespec
|
||||
* format.
|
||||
* ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
|
||||
* @systime_snapshot: pointer to struct receiving the system time snapshot
|
||||
*/
|
||||
void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, struct timespec64 *ts_real)
|
||||
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
|
||||
{
|
||||
struct timekeeper *tk = &tk_core.timekeeper;
|
||||
unsigned long seq;
|
||||
s64 nsecs_raw, nsecs_real;
|
||||
ktime_t base_raw;
|
||||
ktime_t base_real;
|
||||
s64 nsec_raw;
|
||||
s64 nsec_real;
|
||||
cycle_t now;
|
||||
|
||||
WARN_ON_ONCE(timekeeping_suspended);
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&tk_core.seq);
|
||||
|
||||
*ts_raw = tk->raw_time;
|
||||
ts_real->tv_sec = tk->xtime_sec;
|
||||
ts_real->tv_nsec = 0;
|
||||
|
||||
nsecs_raw = timekeeping_get_ns(&tk->tkr_raw);
|
||||
nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
|
||||
|
||||
now = tk->tkr_mono.read(tk->tkr_mono.clock);
|
||||
systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
|
||||
systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
|
||||
base_real = ktime_add(tk->tkr_mono.base,
|
||||
tk_core.timekeeper.offs_real);
|
||||
base_raw = tk->tkr_raw.base;
|
||||
nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
|
||||
nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
|
||||
} while (read_seqcount_retry(&tk_core.seq, seq));
|
||||
|
||||
timespec64_add_ns(ts_raw, nsecs_raw);
|
||||
timespec64_add_ns(ts_real, nsecs_real);
|
||||
systime_snapshot->cycles = now;
|
||||
systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
|
||||
systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
|
||||
}
|
||||
EXPORT_SYMBOL(ktime_get_raw_and_real_ts64);
|
||||
EXPORT_SYMBOL_GPL(ktime_get_snapshot);
|
||||
|
||||
#endif /* CONFIG_NTP_PPS */
|
||||
/* Scale base by mult/div checking for overflow */
|
||||
static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
|
||||
{
|
||||
u64 tmp, rem;
|
||||
|
||||
tmp = div64_u64_rem(*base, div, &rem);
|
||||
|
||||
if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) ||
|
||||
((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
|
||||
return -EOVERFLOW;
|
||||
tmp *= mult;
|
||||
rem *= mult;
|
||||
|
||||
do_div(rem, div);
|
||||
*base = tmp + rem;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
|
||||
* @history: Snapshot representing start of history
|
||||
* @partial_history_cycles: Cycle offset into history (fractional part)
|
||||
* @total_history_cycles: Total history length in cycles
|
||||
* @discontinuity: True indicates clock was set on history period
|
||||
* @ts: Cross timestamp that should be adjusted using
|
||||
* partial/total ratio
|
||||
*
|
||||
* Helper function used by get_device_system_crosststamp() to correct the
|
||||
* crosstimestamp corresponding to the start of the current interval to the
|
||||
* system counter value (timestamp point) provided by the driver. The
|
||||
* total_history_* quantities are the total history starting at the provided
|
||||
* reference point and ending at the start of the current interval. The cycle
|
||||
* count between the driver timestamp point and the start of the current
|
||||
* interval is partial_history_cycles.
|
||||
*/
|
||||
static int adjust_historical_crosststamp(struct system_time_snapshot *history,
|
||||
cycle_t partial_history_cycles,
|
||||
cycle_t total_history_cycles,
|
||||
bool discontinuity,
|
||||
struct system_device_crosststamp *ts)
|
||||
{
|
||||
struct timekeeper *tk = &tk_core.timekeeper;
|
||||
u64 corr_raw, corr_real;
|
||||
bool interp_forward;
|
||||
int ret;
|
||||
|
||||
if (total_history_cycles == 0 || partial_history_cycles == 0)
|
||||
return 0;
|
||||
|
||||
/* Interpolate shortest distance from beginning or end of history */
|
||||
interp_forward = partial_history_cycles > total_history_cycles/2 ?
|
||||
true : false;
|
||||
partial_history_cycles = interp_forward ?
|
||||
total_history_cycles - partial_history_cycles :
|
||||
partial_history_cycles;
|
||||
|
||||
/*
|
||||
* Scale the monotonic raw time delta by:
|
||||
* partial_history_cycles / total_history_cycles
|
||||
*/
|
||||
corr_raw = (u64)ktime_to_ns(
|
||||
ktime_sub(ts->sys_monoraw, history->raw));
|
||||
ret = scale64_check_overflow(partial_history_cycles,
|
||||
total_history_cycles, &corr_raw);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* If there is a discontinuity in the history, scale monotonic raw
|
||||
* correction by:
|
||||
* mult(real)/mult(raw) yielding the realtime correction
|
||||
* Otherwise, calculate the realtime correction similar to monotonic
|
||||
* raw calculation
|
||||
*/
|
||||
if (discontinuity) {
|
||||
corr_real = mul_u64_u32_div
|
||||
(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
|
||||
} else {
|
||||
corr_real = (u64)ktime_to_ns(
|
||||
ktime_sub(ts->sys_realtime, history->real));
|
||||
ret = scale64_check_overflow(partial_history_cycles,
|
||||
total_history_cycles, &corr_real);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Fixup monotonic raw and real time time values */
|
||||
if (interp_forward) {
|
||||
ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
|
||||
ts->sys_realtime = ktime_add_ns(history->real, corr_real);
|
||||
} else {
|
||||
ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
|
||||
ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* cycle_between - true if test occurs chronologically between before and after
|
||||
*/
|
||||
static bool cycle_between(cycle_t before, cycle_t test, cycle_t after)
|
||||
{
|
||||
if (test > before && test < after)
|
||||
return true;
|
||||
if (test < before && before > after)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* get_device_system_crosststamp - Synchronously capture system/device timestamp
|
||||
* @get_time_fn: Callback to get simultaneous device time and
|
||||
* system counter from the device driver
|
||||
* @ctx: Context passed to get_time_fn()
|
||||
* @history_begin: Historical reference point used to interpolate system
|
||||
* time when counter provided by the driver is before the current interval
|
||||
* @xtstamp: Receives simultaneously captured system and device time
|
||||
*
|
||||
* Reads a timestamp from a device and correlates it to system time
|
||||
*/
|
||||
int get_device_system_crosststamp(int (*get_time_fn)
|
||||
(ktime_t *device_time,
|
||||
struct system_counterval_t *sys_counterval,
|
||||
void *ctx),
|
||||
void *ctx,
|
||||
struct system_time_snapshot *history_begin,
|
||||
struct system_device_crosststamp *xtstamp)
|
||||
{
|
||||
struct system_counterval_t system_counterval;
|
||||
struct timekeeper *tk = &tk_core.timekeeper;
|
||||
cycle_t cycles, now, interval_start;
|
||||
unsigned int clock_was_set_seq = 0;
|
||||
ktime_t base_real, base_raw;
|
||||
s64 nsec_real, nsec_raw;
|
||||
u8 cs_was_changed_seq;
|
||||
unsigned long seq;
|
||||
bool do_interp;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&tk_core.seq);
|
||||
/*
|
||||
* Try to synchronously capture device time and a system
|
||||
* counter value calling back into the device driver
|
||||
*/
|
||||
ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Verify that the clocksource associated with the captured
|
||||
* system counter value is the same as the currently installed
|
||||
* timekeeper clocksource
|
||||
*/
|
||||
if (tk->tkr_mono.clock != system_counterval.cs)
|
||||
return -ENODEV;
|
||||
cycles = system_counterval.cycles;
|
||||
|
||||
/*
|
||||
* Check whether the system counter value provided by the
|
||||
* device driver is on the current timekeeping interval.
|
||||
*/
|
||||
now = tk->tkr_mono.read(tk->tkr_mono.clock);
|
||||
interval_start = tk->tkr_mono.cycle_last;
|
||||
if (!cycle_between(interval_start, cycles, now)) {
|
||||
clock_was_set_seq = tk->clock_was_set_seq;
|
||||
cs_was_changed_seq = tk->cs_was_changed_seq;
|
||||
cycles = interval_start;
|
||||
do_interp = true;
|
||||
} else {
|
||||
do_interp = false;
|
||||
}
|
||||
|
||||
base_real = ktime_add(tk->tkr_mono.base,
|
||||
tk_core.timekeeper.offs_real);
|
||||
base_raw = tk->tkr_raw.base;
|
||||
|
||||
nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
|
||||
system_counterval.cycles);
|
||||
nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
|
||||
system_counterval.cycles);
|
||||
} while (read_seqcount_retry(&tk_core.seq, seq));
|
||||
|
||||
xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
|
||||
xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
|
||||
|
||||
/*
|
||||
* Interpolate if necessary, adjusting back from the start of the
|
||||
* current interval
|
||||
*/
|
||||
if (do_interp) {
|
||||
cycle_t partial_history_cycles, total_history_cycles;
|
||||
bool discontinuity;
|
||||
|
||||
/*
|
||||
* Check that the counter value occurs after the provided
|
||||
* history reference and that the history doesn't cross a
|
||||
* clocksource change
|
||||
*/
|
||||
if (!history_begin ||
|
||||
!cycle_between(history_begin->cycles,
|
||||
system_counterval.cycles, cycles) ||
|
||||
history_begin->cs_was_changed_seq != cs_was_changed_seq)
|
||||
return -EINVAL;
|
||||
partial_history_cycles = cycles - system_counterval.cycles;
|
||||
total_history_cycles = cycles - history_begin->cycles;
|
||||
discontinuity =
|
||||
history_begin->clock_was_set_seq != clock_was_set_seq;
|
||||
|
||||
ret = adjust_historical_crosststamp(history_begin,
|
||||
partial_history_cycles,
|
||||
total_history_cycles,
|
||||
discontinuity, xtstamp);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
|
||||
|
||||
/**
|
||||
* do_gettimeofday - Returns the time of day in a timeval
|
||||
|
@@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name)
|
||||
struct ftrace_event_field *field;
|
||||
struct list_head *head;
|
||||
|
||||
head = trace_get_fields(call);
|
||||
field = __find_event_field(head, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
field = __find_event_field(&ftrace_generic_fields, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
field = __find_event_field(&ftrace_common_fields, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
head = trace_get_fields(call);
|
||||
return __find_event_field(head, name);
|
||||
return __find_event_field(&ftrace_common_fields, name);
|
||||
}
|
||||
|
||||
static int __trace_define_field(struct list_head *head, const char *type,
|
||||
@@ -171,8 +171,10 @@ static int trace_define_generic_fields(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__generic_field(int, cpu, FILTER_OTHER);
|
||||
__generic_field(char *, comm, FILTER_PTR_STRING);
|
||||
__generic_field(int, CPU, FILTER_CPU);
|
||||
__generic_field(int, cpu, FILTER_CPU);
|
||||
__generic_field(char *, COMM, FILTER_COMM);
|
||||
__generic_field(char *, comm, FILTER_COMM);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (is_string_field(field)) {
|
||||
if (field->filter_type == FILTER_COMM) {
|
||||
filter_build_regex(pred);
|
||||
fn = filter_pred_comm;
|
||||
pred->regex.field_len = TASK_COMM_LEN;
|
||||
} else if (is_string_field(field)) {
|
||||
filter_build_regex(pred);
|
||||
|
||||
if (!strcmp(field->name, "comm")) {
|
||||
fn = filter_pred_comm;
|
||||
pred->regex.field_len = TASK_COMM_LEN;
|
||||
} else if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
fn = filter_pred_string;
|
||||
pred->regex.field_len = field->size;
|
||||
} else if (field->filter_type == FILTER_DYN_STRING)
|
||||
@@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps,
|
||||
}
|
||||
pred->val = val;
|
||||
|
||||
if (!strcmp(field->name, "cpu"))
|
||||
if (field->filter_type == FILTER_CPU)
|
||||
fn = filter_pred_cpu;
|
||||
else
|
||||
fn = select_comparison_fn(pred->op, field->size,
|
||||
|
@@ -30,7 +30,7 @@
|
||||
struct trace_kprobe {
|
||||
struct list_head list;
|
||||
struct kretprobe rp; /* Use rp.kp for kprobe use */
|
||||
unsigned long nhit;
|
||||
unsigned long __percpu *nhit;
|
||||
const char *symbol; /* symbol name */
|
||||
struct trace_probe tp;
|
||||
};
|
||||
@@ -274,6 +274,10 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
|
||||
if (!tk)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
tk->nhit = alloc_percpu(unsigned long);
|
||||
if (!tk->nhit)
|
||||
goto error;
|
||||
|
||||
if (symbol) {
|
||||
tk->symbol = kstrdup(symbol, GFP_KERNEL);
|
||||
if (!tk->symbol)
|
||||
@@ -313,6 +317,7 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
|
||||
error:
|
||||
kfree(tk->tp.call.name);
|
||||
kfree(tk->symbol);
|
||||
free_percpu(tk->nhit);
|
||||
kfree(tk);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@@ -327,6 +332,7 @@ static void free_trace_kprobe(struct trace_kprobe *tk)
|
||||
kfree(tk->tp.call.class->system);
|
||||
kfree(tk->tp.call.name);
|
||||
kfree(tk->symbol);
|
||||
free_percpu(tk->nhit);
|
||||
kfree(tk);
|
||||
}
|
||||
|
||||
@@ -874,9 +880,14 @@ static const struct file_operations kprobe_events_ops = {
|
||||
static int probes_profile_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct trace_kprobe *tk = v;
|
||||
unsigned long nhit = 0;
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
nhit += *per_cpu_ptr(tk->nhit, cpu);
|
||||
|
||||
seq_printf(m, " %-44s %15lu %15lu\n",
|
||||
trace_event_name(&tk->tp.call), tk->nhit,
|
||||
trace_event_name(&tk->tp.call), nhit,
|
||||
tk->rp.kp.nmissed);
|
||||
|
||||
return 0;
|
||||
@@ -1225,7 +1236,7 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
|
||||
|
||||
tk->nhit++;
|
||||
raw_cpu_inc(*tk->nhit);
|
||||
|
||||
if (tk->tp.flags & TP_FLAG_TRACE)
|
||||
kprobe_trace_func(tk, regs);
|
||||
@@ -1242,7 +1253,7 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
|
||||
|
||||
tk->nhit++;
|
||||
raw_cpu_inc(*tk->nhit);
|
||||
|
||||
if (tk->tp.flags & TP_FLAG_TRACE)
|
||||
kretprobe_trace_func(tk, ri, regs);
|
||||
|
@@ -186,11 +186,11 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
|
||||
|
||||
extern char *__bad_type_size(void);
|
||||
|
||||
#define SYSCALL_FIELD(type, name) \
|
||||
sizeof(type) != sizeof(trace.name) ? \
|
||||
#define SYSCALL_FIELD(type, field, name) \
|
||||
sizeof(type) != sizeof(trace.field) ? \
|
||||
__bad_type_size() : \
|
||||
#type, #name, offsetof(typeof(trace), name), \
|
||||
sizeof(trace.name), is_signed_type(type)
|
||||
#type, #name, offsetof(typeof(trace), field), \
|
||||
sizeof(trace.field), is_signed_type(type)
|
||||
|
||||
static int __init
|
||||
__set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
|
||||
@@ -261,7 +261,8 @@ static int __init syscall_enter_define_fields(struct trace_event_call *call)
|
||||
int i;
|
||||
int offset = offsetof(typeof(trace), args);
|
||||
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
|
||||
FILTER_OTHER);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -281,11 +282,12 @@ static int __init syscall_exit_define_fields(struct trace_event_call *call)
|
||||
struct syscall_trace_exit trace;
|
||||
int ret;
|
||||
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(int, nr, __syscall_nr),
|
||||
FILTER_OTHER);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
|
||||
ret = trace_define_field(call, SYSCALL_FIELD(long, ret, ret),
|
||||
FILTER_OTHER);
|
||||
|
||||
return ret;
|
||||
|
@@ -93,9 +93,11 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
|
||||
/* convert pages-usec to Mbyte-usec */
|
||||
stats->coremem = p->acct_rss_mem1 * PAGE_SIZE / MB;
|
||||
stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE / MB;
|
||||
/* convert pages-nsec/1024 to Mbyte-usec, see __acct_update_integrals */
|
||||
stats->coremem = p->acct_rss_mem1 * PAGE_SIZE;
|
||||
do_div(stats->coremem, 1000 * KB);
|
||||
stats->virtmem = p->acct_vm_mem1 * PAGE_SIZE;
|
||||
do_div(stats->virtmem, 1000 * KB);
|
||||
mm = get_task_mm(p);
|
||||
if (mm) {
|
||||
/* adjust to KB unit */
|
||||
@@ -123,27 +125,28 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
|
||||
static void __acct_update_integrals(struct task_struct *tsk,
|
||||
cputime_t utime, cputime_t stime)
|
||||
{
|
||||
if (likely(tsk->mm)) {
|
||||
cputime_t time, dtime;
|
||||
struct timeval value;
|
||||
unsigned long flags;
|
||||
u64 delta;
|
||||
cputime_t time, dtime;
|
||||
u64 delta;
|
||||
|
||||
local_irq_save(flags);
|
||||
time = stime + utime;
|
||||
dtime = time - tsk->acct_timexpd;
|
||||
jiffies_to_timeval(cputime_to_jiffies(dtime), &value);
|
||||
delta = value.tv_sec;
|
||||
delta = delta * USEC_PER_SEC + value.tv_usec;
|
||||
if (!likely(tsk->mm))
|
||||
return;
|
||||
|
||||
if (delta == 0)
|
||||
goto out;
|
||||
tsk->acct_timexpd = time;
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm);
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm;
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
time = stime + utime;
|
||||
dtime = time - tsk->acct_timexpd;
|
||||
/* Avoid division: cputime_t is often in nanoseconds already. */
|
||||
delta = cputime_to_nsecs(dtime);
|
||||
|
||||
if (delta < TICK_NSEC)
|
||||
return;
|
||||
|
||||
tsk->acct_timexpd = time;
|
||||
/*
|
||||
* Divide by 1024 to avoid overflow, and to avoid division.
|
||||
* The final unit reported to userspace is Mbyte-usecs,
|
||||
* the rest of the math is done in xacct_add_tsk.
|
||||
*/
|
||||
tsk->acct_rss_mem1 += delta * get_mm_rss(tsk->mm) >> 10;
|
||||
tsk->acct_vm_mem1 += delta * tsk->mm->total_vm >> 10;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -153,9 +156,12 @@ static void __acct_update_integrals(struct task_struct *tsk,
|
||||
void acct_update_integrals(struct task_struct *tsk)
|
||||
{
|
||||
cputime_t utime, stime;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
task_cputime(tsk, &utime, &stime);
|
||||
__acct_update_integrals(tsk, utime, stime);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Reference in New Issue
Block a user