Merge branch 'sched/urgent' into sched/core
Merge reason: Pick up fixes that did not make it into .32.0 Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
@@ -1710,14 +1710,13 @@ static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft,
|
||||
return -EFAULT;
|
||||
|
||||
buffer[nbytes] = 0; /* nul-terminate */
|
||||
strstrip(buffer);
|
||||
if (cft->write_u64) {
|
||||
u64 val = simple_strtoull(buffer, &end, 0);
|
||||
u64 val = simple_strtoull(strstrip(buffer), &end, 0);
|
||||
if (*end)
|
||||
return -EINVAL;
|
||||
retval = cft->write_u64(cgrp, cft, val);
|
||||
} else {
|
||||
s64 val = simple_strtoll(buffer, &end, 0);
|
||||
s64 val = simple_strtoll(strstrip(buffer), &end, 0);
|
||||
if (*end)
|
||||
return -EINVAL;
|
||||
retval = cft->write_s64(cgrp, cft, val);
|
||||
@@ -1753,8 +1752,7 @@ static ssize_t cgroup_write_string(struct cgroup *cgrp, struct cftype *cft,
|
||||
}
|
||||
|
||||
buffer[nbytes] = 0; /* nul-terminate */
|
||||
strstrip(buffer);
|
||||
retval = cft->write_string(cgrp, cft, buffer);
|
||||
retval = cft->write_string(cgrp, cft, strstrip(buffer));
|
||||
if (!retval)
|
||||
retval = nbytes;
|
||||
out:
|
||||
|
@@ -359,10 +359,8 @@ void __set_special_pids(struct pid *pid)
|
||||
{
|
||||
struct task_struct *curr = current->group_leader;
|
||||
|
||||
if (task_session(curr) != pid) {
|
||||
if (task_session(curr) != pid)
|
||||
change_pid(curr, PIDTYPE_SID, pid);
|
||||
proc_sid_connector(curr);
|
||||
}
|
||||
|
||||
if (task_pgrp(curr) != pid)
|
||||
change_pid(curr, PIDTYPE_PGID, pid);
|
||||
|
@@ -91,7 +91,7 @@ int nr_processes(void)
|
||||
int cpu;
|
||||
int total = 0;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
for_each_possible_cpu(cpu)
|
||||
total += per_cpu(process_counts, cpu);
|
||||
|
||||
return total;
|
||||
|
@@ -150,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
|
||||
*/
|
||||
static inline int match_futex(union futex_key *key1, union futex_key *key2)
|
||||
{
|
||||
return (key1->both.word == key2->both.word
|
||||
return (key1 && key2
|
||||
&& key1->both.word == key2->both.word
|
||||
&& key1->both.ptr == key2->both.ptr
|
||||
&& key1->both.offset == key2->both.offset);
|
||||
}
|
||||
@@ -1028,7 +1029,6 @@ static inline
|
||||
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
struct futex_hash_bucket *hb)
|
||||
{
|
||||
drop_futex_key_refs(&q->key);
|
||||
get_futex_key_refs(key);
|
||||
q->key = *key;
|
||||
|
||||
@@ -1226,6 +1226,7 @@ retry_private:
|
||||
*/
|
||||
if (ret == 1) {
|
||||
WARN_ON(pi_state);
|
||||
drop_count++;
|
||||
task_count++;
|
||||
ret = get_futex_value_locked(&curval2, uaddr2);
|
||||
if (!ret)
|
||||
@@ -1304,6 +1305,7 @@ retry_private:
|
||||
if (ret == 1) {
|
||||
/* We got the lock. */
|
||||
requeue_pi_wake_futex(this, &key2, hb2);
|
||||
drop_count++;
|
||||
continue;
|
||||
} else if (ret) {
|
||||
/* -EDEADLK */
|
||||
@@ -1791,6 +1793,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
|
||||
current->timer_slack_ns);
|
||||
}
|
||||
|
||||
retry:
|
||||
/* Prepare to wait on uaddr. */
|
||||
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
|
||||
if (ret)
|
||||
@@ -1808,9 +1811,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
|
||||
goto out_put_key;
|
||||
|
||||
/*
|
||||
* We expect signal_pending(current), but another thread may
|
||||
* have handled it for us already.
|
||||
* We expect signal_pending(current), but we might be the
|
||||
* victim of a spurious wakeup as well.
|
||||
*/
|
||||
if (!signal_pending(current)) {
|
||||
put_futex_key(fshared, &q.key);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
ret = -ERESTARTSYS;
|
||||
if (!abs_time)
|
||||
goto out_put_key;
|
||||
@@ -2118,9 +2126,11 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
||||
*/
|
||||
plist_del(&q->list, &q->list.plist);
|
||||
|
||||
/* Handle spurious wakeups gracefully */
|
||||
ret = -EWOULDBLOCK;
|
||||
if (timeout && !timeout->task)
|
||||
ret = -ETIMEDOUT;
|
||||
else
|
||||
else if (signal_pending(current))
|
||||
ret = -ERESTARTNOINTR;
|
||||
}
|
||||
return ret;
|
||||
|
@@ -121,7 +121,9 @@ static void poll_all_shared_irqs(void)
|
||||
if (!(status & IRQ_SPURIOUS_DISABLED))
|
||||
continue;
|
||||
|
||||
local_irq_disable();
|
||||
try_one_irq(i, desc);
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -149,29 +149,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_create);
|
||||
|
||||
/**
|
||||
* kthread_bind - bind a just-created kthread to a cpu.
|
||||
* @k: thread created by kthread_create().
|
||||
* @cpu: cpu (might not be online, must be possible) for @k to run on.
|
||||
*
|
||||
* Description: This function is equivalent to set_cpus_allowed(),
|
||||
* except that @cpu doesn't need to be online, and the thread must be
|
||||
* stopped (i.e., just returned from kthread_create()).
|
||||
*/
|
||||
void kthread_bind(struct task_struct *k, unsigned int cpu)
|
||||
{
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
if (!wait_task_inactive(k, TASK_UNINTERRUPTIBLE)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
set_task_cpu(k, cpu);
|
||||
k->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
k->rt.nr_cpus_allowed = 1;
|
||||
k->flags |= PF_THREAD_BOUND;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
/**
|
||||
* kthread_stop - stop a thread created by kthread_create().
|
||||
* @k: thread created by kthread_create().
|
||||
|
@@ -218,15 +218,11 @@ int param_set_charp(const char *val, struct kernel_param *kp)
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (kp->flags & KPARAM_KMALLOCED)
|
||||
kfree(*(char **)kp->arg);
|
||||
|
||||
/* This is a hack. We can't need to strdup in early boot, and we
|
||||
* don't need to; this mangled commandline is preserved. */
|
||||
if (slab_is_available()) {
|
||||
kp->flags |= KPARAM_KMALLOCED;
|
||||
*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
|
||||
if (!kp->arg)
|
||||
if (!*(char **)kp->arg)
|
||||
return -ENOMEM;
|
||||
} else
|
||||
*(const char **)kp->arg = val;
|
||||
@@ -304,6 +300,7 @@ static int param_array(const char *name,
|
||||
unsigned int min, unsigned int max,
|
||||
void *elem, int elemsize,
|
||||
int (*set)(const char *, struct kernel_param *kp),
|
||||
u16 flags,
|
||||
unsigned int *num)
|
||||
{
|
||||
int ret;
|
||||
@@ -313,6 +310,7 @@ static int param_array(const char *name,
|
||||
/* Get the name right for errors. */
|
||||
kp.name = name;
|
||||
kp.arg = elem;
|
||||
kp.flags = flags;
|
||||
|
||||
/* No equals sign? */
|
||||
if (!val) {
|
||||
@@ -358,7 +356,8 @@ int param_array_set(const char *val, struct kernel_param *kp)
|
||||
unsigned int temp_num;
|
||||
|
||||
return param_array(kp->name, val, 1, arr->max, arr->elem,
|
||||
arr->elemsize, arr->set, arr->num ?: &temp_num);
|
||||
arr->elemsize, arr->set, kp->flags,
|
||||
arr->num ?: &temp_num);
|
||||
}
|
||||
|
||||
int param_array_get(char *buffer, struct kernel_param *kp)
|
||||
@@ -605,11 +604,7 @@ void module_param_sysfs_remove(struct module *mod)
|
||||
|
||||
void destroy_params(const struct kernel_param *params, unsigned num)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < num; i++)
|
||||
if (params[i].flags & KPARAM_KMALLOCED)
|
||||
kfree(*(char **)params[i].arg);
|
||||
/* FIXME: This should free kmalloced charp parameters. It doesn't. */
|
||||
}
|
||||
|
||||
static void __init kernel_add_sysfs_param(const char *name,
|
||||
|
@@ -1355,7 +1355,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
|
||||
u64 interrupts, freq;
|
||||
|
||||
spin_lock(&ctx->lock);
|
||||
list_for_each_entry(event, &ctx->group_list, group_entry) {
|
||||
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
|
||||
if (event->state != PERF_EVENT_STATE_ACTIVE)
|
||||
continue;
|
||||
|
||||
@@ -3959,8 +3959,9 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
|
||||
regs = task_pt_regs(current);
|
||||
|
||||
if (regs) {
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
ret = HRTIMER_NORESTART;
|
||||
if (!(event->attr.exclude_idle && current->pid == 0))
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
ret = HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
period = max_t(u64, 10000, event->hw.sample_period);
|
||||
@@ -3969,6 +3970,42 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void perf_swevent_start_hrtimer(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hwc->hrtimer.function = perf_swevent_hrtimer;
|
||||
if (hwc->sample_period) {
|
||||
u64 period;
|
||||
|
||||
if (hwc->remaining) {
|
||||
if (hwc->remaining < 0)
|
||||
period = 10000;
|
||||
else
|
||||
period = hwc->remaining;
|
||||
hwc->remaining = 0;
|
||||
} else {
|
||||
period = max_t(u64, 10000, hwc->sample_period);
|
||||
}
|
||||
__hrtimer_start_range_ns(&hwc->hrtimer,
|
||||
ns_to_ktime(period), 0,
|
||||
HRTIMER_MODE_REL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void perf_swevent_cancel_hrtimer(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
if (hwc->sample_period) {
|
||||
ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer);
|
||||
hwc->remaining = ktime_to_ns(remaining);
|
||||
|
||||
hrtimer_cancel(&hwc->hrtimer);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Software event: cpu wall time clock
|
||||
*/
|
||||
@@ -3991,22 +4028,14 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
|
||||
int cpu = raw_smp_processor_id();
|
||||
|
||||
atomic64_set(&hwc->prev_count, cpu_clock(cpu));
|
||||
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hwc->hrtimer.function = perf_swevent_hrtimer;
|
||||
if (hwc->sample_period) {
|
||||
u64 period = max_t(u64, 10000, hwc->sample_period);
|
||||
__hrtimer_start_range_ns(&hwc->hrtimer,
|
||||
ns_to_ktime(period), 0,
|
||||
HRTIMER_MODE_REL, 0);
|
||||
}
|
||||
perf_swevent_start_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpu_clock_perf_event_disable(struct perf_event *event)
|
||||
{
|
||||
if (event->hw.sample_period)
|
||||
hrtimer_cancel(&event->hw.hrtimer);
|
||||
perf_swevent_cancel_hrtimer(event);
|
||||
cpu_clock_perf_event_update(event);
|
||||
}
|
||||
|
||||
@@ -4043,22 +4072,15 @@ static int task_clock_perf_event_enable(struct perf_event *event)
|
||||
now = event->ctx->time;
|
||||
|
||||
atomic64_set(&hwc->prev_count, now);
|
||||
hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hwc->hrtimer.function = perf_swevent_hrtimer;
|
||||
if (hwc->sample_period) {
|
||||
u64 period = max_t(u64, 10000, hwc->sample_period);
|
||||
__hrtimer_start_range_ns(&hwc->hrtimer,
|
||||
ns_to_ktime(period), 0,
|
||||
HRTIMER_MODE_REL, 0);
|
||||
}
|
||||
|
||||
perf_swevent_start_hrtimer(event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void task_clock_perf_event_disable(struct perf_event *event)
|
||||
{
|
||||
if (event->hw.sample_period)
|
||||
hrtimer_cancel(&event->hw.hrtimer);
|
||||
perf_swevent_cancel_hrtimer(event);
|
||||
task_clock_perf_event_update(event, event->ctx->time);
|
||||
|
||||
}
|
||||
|
@@ -693,21 +693,22 @@ static int software_resume(void)
|
||||
/* The snapshot device should not be opened while we're running */
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
error = -EBUSY;
|
||||
swsusp_close(FMODE_READ);
|
||||
goto Unlock;
|
||||
}
|
||||
|
||||
pm_prepare_console();
|
||||
error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
|
||||
if (error)
|
||||
goto Finish;
|
||||
goto close_finish;
|
||||
|
||||
error = usermodehelper_disable();
|
||||
if (error)
|
||||
goto Finish;
|
||||
goto close_finish;
|
||||
|
||||
error = create_basic_memory_bitmaps();
|
||||
if (error)
|
||||
goto Finish;
|
||||
goto close_finish;
|
||||
|
||||
pr_debug("PM: Preparing processes for restore.\n");
|
||||
error = prepare_processes();
|
||||
@@ -719,6 +720,7 @@ static int software_resume(void)
|
||||
pr_debug("PM: Reading hibernation image.\n");
|
||||
|
||||
error = swsusp_read(&flags);
|
||||
swsusp_close(FMODE_READ);
|
||||
if (!error)
|
||||
hibernation_restore(flags & SF_PLATFORM_MODE);
|
||||
|
||||
@@ -737,6 +739,9 @@ static int software_resume(void)
|
||||
mutex_unlock(&pm_mutex);
|
||||
pr_debug("PM: Resume from disk failed.\n");
|
||||
return error;
|
||||
close_finish:
|
||||
swsusp_close(FMODE_READ);
|
||||
goto Finish;
|
||||
}
|
||||
|
||||
late_initcall(software_resume);
|
||||
|
@@ -314,7 +314,6 @@ static int save_image(struct swap_map_handle *handle,
|
||||
{
|
||||
unsigned int m;
|
||||
int ret;
|
||||
int error = 0;
|
||||
int nr_pages;
|
||||
int err2;
|
||||
struct bio *bio;
|
||||
@@ -329,26 +328,27 @@ static int save_image(struct swap_map_handle *handle,
|
||||
nr_pages = 0;
|
||||
bio = NULL;
|
||||
do_gettimeofday(&start);
|
||||
do {
|
||||
while (1) {
|
||||
ret = snapshot_read_next(snapshot, PAGE_SIZE);
|
||||
if (ret > 0) {
|
||||
error = swap_write_page(handle, data_of(*snapshot),
|
||||
&bio);
|
||||
if (error)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
nr_pages++;
|
||||
}
|
||||
} while (ret > 0);
|
||||
if (ret <= 0)
|
||||
break;
|
||||
ret = swap_write_page(handle, data_of(*snapshot), &bio);
|
||||
if (ret)
|
||||
break;
|
||||
if (!(nr_pages % m))
|
||||
printk("\b\b\b\b%3d%%", nr_pages / m);
|
||||
nr_pages++;
|
||||
}
|
||||
err2 = wait_on_bio_chain(&bio);
|
||||
do_gettimeofday(&stop);
|
||||
if (!error)
|
||||
error = err2;
|
||||
if (!error)
|
||||
if (!ret)
|
||||
ret = err2;
|
||||
if (!ret)
|
||||
printk("\b\b\b\bdone\n");
|
||||
else
|
||||
printk("\n");
|
||||
swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
|
||||
return error;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -536,7 +536,8 @@ static int load_image(struct swap_map_handle *handle,
|
||||
snapshot_write_finalize(snapshot);
|
||||
if (!snapshot_image_loaded(snapshot))
|
||||
error = -ENODATA;
|
||||
}
|
||||
} else
|
||||
printk("\n");
|
||||
swsusp_show_speed(&start, &stop, nr_to_read, "Read");
|
||||
return error;
|
||||
}
|
||||
@@ -572,8 +573,6 @@ int swsusp_read(unsigned int *flags_p)
|
||||
error = load_image(&handle, &snapshot, header->pages - 1);
|
||||
release_swap_reader(&handle);
|
||||
|
||||
blkdev_put(resume_bdev, FMODE_READ);
|
||||
|
||||
if (!error)
|
||||
pr_debug("PM: Image successfully loaded\n");
|
||||
else
|
||||
@@ -596,7 +595,7 @@ int swsusp_check(void)
|
||||
error = bio_read_page(swsusp_resume_block,
|
||||
swsusp_header, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
goto put;
|
||||
|
||||
if (!memcmp(SWSUSP_SIG, swsusp_header->sig, 10)) {
|
||||
memcpy(swsusp_header->sig, swsusp_header->orig_sig, 10);
|
||||
@@ -604,8 +603,10 @@ int swsusp_check(void)
|
||||
error = bio_write_page(swsusp_resume_block,
|
||||
swsusp_header, NULL);
|
||||
} else {
|
||||
return -EINVAL;
|
||||
error = -EINVAL;
|
||||
}
|
||||
|
||||
put:
|
||||
if (error)
|
||||
blkdev_put(resume_bdev, FMODE_READ);
|
||||
else
|
||||
|
@@ -59,7 +59,7 @@
|
||||
NUM_RCU_LVL_2, \
|
||||
NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \
|
||||
}, \
|
||||
.signaled = RCU_SIGNAL_INIT, \
|
||||
.signaled = RCU_GP_IDLE, \
|
||||
.gpnum = -300, \
|
||||
.completed = -300, \
|
||||
.onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \
|
||||
@@ -657,14 +657,17 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
|
||||
* irqs disabled.
|
||||
*/
|
||||
rcu_for_each_node_breadth_first(rsp, rnp) {
|
||||
spin_lock(&rnp->lock); /* irqs already disabled. */
|
||||
spin_lock(&rnp->lock); /* irqs already disabled. */
|
||||
rcu_preempt_check_blocked_tasks(rnp);
|
||||
rnp->qsmask = rnp->qsmaskinit;
|
||||
rnp->gpnum = rsp->gpnum;
|
||||
spin_unlock(&rnp->lock); /* irqs already disabled. */
|
||||
spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
}
|
||||
|
||||
rnp = rcu_get_root(rsp);
|
||||
spin_lock(&rnp->lock); /* irqs already disabled. */
|
||||
rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
|
||||
spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
spin_unlock_irqrestore(&rsp->onofflock, flags);
|
||||
}
|
||||
|
||||
@@ -706,6 +709,7 @@ static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
|
||||
{
|
||||
WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
|
||||
rsp->completed = rsp->gpnum;
|
||||
rsp->signaled = RCU_GP_IDLE;
|
||||
rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
|
||||
rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
|
||||
}
|
||||
@@ -913,7 +917,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
|
||||
spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
break;
|
||||
}
|
||||
rcu_preempt_offline_tasks(rsp, rnp, rdp);
|
||||
|
||||
/*
|
||||
* If there was a task blocking the current grace period,
|
||||
* and if all CPUs have checked in, we need to propagate
|
||||
* the quiescent state up the rcu_node hierarchy. But that
|
||||
* is inconvenient at the moment due to deadlock issues if
|
||||
* this should end the current grace period. So set the
|
||||
* offlined CPU's bit in ->qsmask in order to force the
|
||||
* next force_quiescent_state() invocation to clean up this
|
||||
* mess in a deadlock-free manner.
|
||||
*/
|
||||
if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask)
|
||||
rnp->qsmask |= mask;
|
||||
|
||||
mask = rnp->grpmask;
|
||||
spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
rnp = rnp->parent;
|
||||
@@ -958,7 +975,7 @@ static void rcu_offline_cpu(int cpu)
|
||||
* Invoke any RCU callbacks that have made it to the end of their grace
|
||||
* period. Thottle as specified by rdp->blimit.
|
||||
*/
|
||||
static void rcu_do_batch(struct rcu_data *rdp)
|
||||
static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct rcu_head *next, *list, **tail;
|
||||
@@ -1011,6 +1028,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
|
||||
if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
|
||||
rdp->blimit = blimit;
|
||||
|
||||
/* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
|
||||
if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
rdp->n_force_qs_snap = rsp->n_force_qs;
|
||||
} else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
|
||||
rdp->qlen_last_fqs_check = rdp->qlen;
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* Re-raise the RCU softirq if there are callbacks remaining. */
|
||||
@@ -1142,9 +1166,10 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
|
||||
}
|
||||
spin_unlock(&rnp->lock);
|
||||
switch (signaled) {
|
||||
case RCU_GP_IDLE:
|
||||
case RCU_GP_INIT:
|
||||
|
||||
break; /* grace period still initializing, ignore. */
|
||||
break; /* grace period idle or initializing, ignore. */
|
||||
|
||||
case RCU_SAVE_DYNTICK:
|
||||
|
||||
@@ -1158,7 +1183,8 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
|
||||
|
||||
/* Update state, record completion counter. */
|
||||
spin_lock(&rnp->lock);
|
||||
if (lastcomp == rsp->completed) {
|
||||
if (lastcomp == rsp->completed &&
|
||||
rsp->signaled == RCU_SAVE_DYNTICK) {
|
||||
rsp->signaled = RCU_FORCE_QS;
|
||||
dyntick_record_completed(rsp, lastcomp);
|
||||
}
|
||||
@@ -1224,7 +1250,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
}
|
||||
|
||||
/* If there are callbacks ready, invoke them. */
|
||||
rcu_do_batch(rdp);
|
||||
rcu_do_batch(rsp, rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1288,10 +1314,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
|
||||
rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
|
||||
}
|
||||
|
||||
/* Force the grace period if too many callbacks or too long waiting. */
|
||||
if (unlikely(++rdp->qlen > qhimark)) {
|
||||
/*
|
||||
* Force the grace period if too many callbacks or too long waiting.
|
||||
* Enforce hysteresis, and don't invoke force_quiescent_state()
|
||||
* if some other CPU has recently done so. Also, don't bother
|
||||
* invoking force_quiescent_state() if the newly enqueued callback
|
||||
* is the only one waiting for a grace period to complete.
|
||||
*/
|
||||
if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
|
||||
rdp->blimit = LONG_MAX;
|
||||
force_quiescent_state(rsp, 0);
|
||||
if (rsp->n_force_qs == rdp->n_force_qs_snap &&
|
||||
*rdp->nxttail[RCU_DONE_TAIL] != head)
|
||||
force_quiescent_state(rsp, 0);
|
||||
rdp->n_force_qs_snap = rsp->n_force_qs;
|
||||
rdp->qlen_last_fqs_check = rdp->qlen;
|
||||
} else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
|
||||
force_quiescent_state(rsp, 1);
|
||||
local_irq_restore(flags);
|
||||
@@ -1523,6 +1559,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
|
||||
rdp->beenonline = 1; /* We have now been online. */
|
||||
rdp->preemptable = preemptable;
|
||||
rdp->passed_quiesc_completed = lastcomp - 1;
|
||||
rdp->qlen_last_fqs_check = 0;
|
||||
rdp->n_force_qs_snap = rsp->n_force_qs;
|
||||
rdp->blimit = blimit;
|
||||
spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
|
||||
|
@@ -167,6 +167,10 @@ struct rcu_data {
|
||||
struct rcu_head *nxtlist;
|
||||
struct rcu_head **nxttail[RCU_NEXT_SIZE];
|
||||
long qlen; /* # of queued callbacks */
|
||||
long qlen_last_fqs_check;
|
||||
/* qlen at last check for QS forcing */
|
||||
unsigned long n_force_qs_snap;
|
||||
/* did other CPU force QS recently? */
|
||||
long blimit; /* Upper limit on a processed batch */
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
@@ -197,9 +201,10 @@ struct rcu_data {
|
||||
};
|
||||
|
||||
/* Values for signaled field in struct rcu_state. */
|
||||
#define RCU_GP_INIT 0 /* Grace period being initialized. */
|
||||
#define RCU_SAVE_DYNTICK 1 /* Need to scan dyntick state. */
|
||||
#define RCU_FORCE_QS 2 /* Need to force quiescent state. */
|
||||
#define RCU_GP_IDLE 0 /* No grace period in progress. */
|
||||
#define RCU_GP_INIT 1 /* Grace period being initialized. */
|
||||
#define RCU_SAVE_DYNTICK 2 /* Need to scan dyntick state. */
|
||||
#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
|
||||
#ifdef CONFIG_NO_HZ
|
||||
#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
|
||||
#else /* #ifdef CONFIG_NO_HZ */
|
||||
@@ -302,9 +307,9 @@ static void rcu_print_task_stall(struct rcu_node *rnp);
|
||||
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp);
|
||||
static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp);
|
||||
static void rcu_preempt_offline_cpu(int cpu);
|
||||
#endif /* #ifdef CONFIG_HOTPLUG_CPU */
|
||||
static void rcu_preempt_check_callbacks(int cpu);
|
||||
|
@@ -304,21 +304,25 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
|
||||
* parent is to remove the need for rcu_read_unlock_special() to
|
||||
* make more than two attempts to acquire the target rcu_node's lock.
|
||||
*
|
||||
* Returns 1 if there was previously a task blocking the current grace
|
||||
* period on the specified rcu_node structure.
|
||||
*
|
||||
* The caller must hold rnp->lock with irqs disabled.
|
||||
*/
|
||||
static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
int i;
|
||||
struct list_head *lp;
|
||||
struct list_head *lp_root;
|
||||
int retval = rcu_preempted_readers(rnp);
|
||||
struct rcu_node *rnp_root = rcu_get_root(rsp);
|
||||
struct task_struct *tp;
|
||||
|
||||
if (rnp == rnp_root) {
|
||||
WARN_ONCE(1, "Last CPU thought to be offlined?");
|
||||
return; /* Shouldn't happen: at least one CPU online. */
|
||||
return 0; /* Shouldn't happen: at least one CPU online. */
|
||||
}
|
||||
WARN_ON_ONCE(rnp != rdp->mynode &&
|
||||
(!list_empty(&rnp->blocked_tasks[0]) ||
|
||||
@@ -342,6 +346,8 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
spin_unlock(&rnp_root->lock); /* irqs remain disabled */
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -392,6 +398,17 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/*
|
||||
* Wait for an rcu-preempt grace period. We are supposed to expedite the
|
||||
* grace period, but this is the crude slow compatability hack, so just
|
||||
* invoke synchronize_rcu().
|
||||
*/
|
||||
void synchronize_rcu_expedited(void)
|
||||
{
|
||||
synchronize_rcu();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
|
||||
|
||||
/*
|
||||
* Check to see if there is any immediate preemptable-RCU-related work
|
||||
* to be done.
|
||||
@@ -521,12 +538,15 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
|
||||
|
||||
/*
|
||||
* Because preemptable RCU does not exist, it never needs to migrate
|
||||
* tasks that were blocked within RCU read-side critical sections.
|
||||
* tasks that were blocked within RCU read-side critical sections, and
|
||||
* such non-existent tasks cannot possibly have been blocking the current
|
||||
* grace period.
|
||||
*/
|
||||
static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -564,6 +584,16 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
/*
|
||||
* Wait for an rcu-preempt grace period, but make it happen quickly.
|
||||
* But because preemptable RCU does not exist, map to rcu-sched.
|
||||
*/
|
||||
void synchronize_rcu_expedited(void)
|
||||
{
|
||||
synchronize_sched_expedited();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
|
||||
|
||||
/*
|
||||
* Because preemptable RCU does not exist, it never has any work to do.
|
||||
*/
|
||||
|
@@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq);
|
||||
*/
|
||||
static DEFINE_SPINLOCK(task_group_lock);
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static int root_task_group_empty(void)
|
||||
{
|
||||
@@ -316,7 +318,6 @@ static int root_task_group_empty(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
#ifdef CONFIG_USER_SCHED
|
||||
# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD)
|
||||
#else /* !CONFIG_USER_SCHED */
|
||||
@@ -1564,11 +1565,7 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
struct update_shares_data {
|
||||
unsigned long rq_weight[NR_CPUS];
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct update_shares_data, update_shares_data);
|
||||
static __read_mostly unsigned long *update_shares_data;
|
||||
|
||||
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
|
||||
|
||||
@@ -1578,12 +1575,12 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares);
|
||||
static void update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
unsigned long sd_shares,
|
||||
unsigned long sd_rq_weight,
|
||||
struct update_shares_data *usd)
|
||||
unsigned long *usd_rq_weight)
|
||||
{
|
||||
unsigned long shares, rq_weight;
|
||||
int boost = 0;
|
||||
|
||||
rq_weight = usd->rq_weight[cpu];
|
||||
rq_weight = usd_rq_weight[cpu];
|
||||
if (!rq_weight) {
|
||||
boost = 1;
|
||||
rq_weight = NICE_0_LOAD;
|
||||
@@ -1618,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
static int tg_shares_up(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long weight, rq_weight = 0, shares = 0;
|
||||
struct update_shares_data *usd;
|
||||
unsigned long *usd_rq_weight;
|
||||
struct sched_domain *sd = data;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
@@ -1627,11 +1624,11 @@ static int tg_shares_up(struct task_group *tg, void *data)
|
||||
return 0;
|
||||
|
||||
local_irq_save(flags);
|
||||
usd = &__get_cpu_var(update_shares_data);
|
||||
usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id());
|
||||
|
||||
for_each_cpu(i, sched_domain_span(sd)) {
|
||||
weight = tg->cfs_rq[i]->load.weight;
|
||||
usd->rq_weight[i] = weight;
|
||||
usd_rq_weight[i] = weight;
|
||||
|
||||
/*
|
||||
* If there are currently no tasks on the cpu pretend there
|
||||
@@ -1652,7 +1649,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
|
||||
shares = tg->shares;
|
||||
|
||||
for_each_cpu(i, sched_domain_span(sd))
|
||||
update_group_shares_cpu(tg, i, shares, rq_weight, usd);
|
||||
update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight);
|
||||
|
||||
local_irq_restore(flags);
|
||||
|
||||
@@ -1996,6 +1993,39 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
|
||||
p->sched_class->prio_changed(rq, p, oldprio, running);
|
||||
}
|
||||
|
||||
/**
|
||||
* kthread_bind - bind a just-created kthread to a cpu.
|
||||
* @p: thread created by kthread_create().
|
||||
* @cpu: cpu (might not be online, must be possible) for @k to run on.
|
||||
*
|
||||
* Description: This function is equivalent to set_cpus_allowed(),
|
||||
* except that @cpu doesn't need to be online, and the thread must be
|
||||
* stopped (i.e., just returned from kthread_create()).
|
||||
*
|
||||
* Function lives here instead of kthread.c because it messes with
|
||||
* scheduler internals which require locking.
|
||||
*/
|
||||
void kthread_bind(struct task_struct *p, unsigned int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&rq->lock, flags);
|
||||
update_rq_clock(rq);
|
||||
set_task_cpu(p, cpu);
|
||||
p->cpus_allowed = cpumask_of_cpu(cpu);
|
||||
p->rt.nr_cpus_allowed = 1;
|
||||
p->flags |= PF_THREAD_BOUND;
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Is this task likely cache-hot:
|
||||
@@ -2008,7 +2038,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
|
||||
/*
|
||||
* Buddy candidates are cache hot:
|
||||
*/
|
||||
if (sched_feat(CACHE_HOT_BUDDY) &&
|
||||
if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running &&
|
||||
(&p->se == cfs_rq_of(&p->se)->next ||
|
||||
&p->se == cfs_rq_of(&p->se)->last))
|
||||
return 1;
|
||||
@@ -2085,6 +2115,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
|
||||
* it is sufficient to simply update the task's cpu field.
|
||||
*/
|
||||
if (!p->se.on_rq && !task_running(rq, p)) {
|
||||
update_rq_clock(rq);
|
||||
set_task_cpu(p, dest_cpu);
|
||||
return 0;
|
||||
}
|
||||
@@ -2346,13 +2377,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
|
||||
if (cpu != orig_cpu)
|
||||
set_task_cpu(p, cpu);
|
||||
|
||||
rq = task_rq_lock(p, &flags);
|
||||
|
||||
if (rq != orig_rq)
|
||||
if (cpu != orig_cpu) {
|
||||
local_irq_save(flags);
|
||||
rq = cpu_rq(cpu);
|
||||
update_rq_clock(rq);
|
||||
set_task_cpu(p, cpu);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
rq = task_rq_lock(p, &flags);
|
||||
|
||||
WARN_ON(p->state != TASK_WAKING);
|
||||
cpu = task_cpu(p);
|
||||
@@ -2526,6 +2558,7 @@ static void __sched_fork(struct task_struct *p)
|
||||
void sched_fork(struct task_struct *p, int clone_flags)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
unsigned long flags;
|
||||
|
||||
__sched_fork(p);
|
||||
|
||||
@@ -2562,7 +2595,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
|
||||
#ifdef CONFIG_SMP
|
||||
cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
|
||||
#endif
|
||||
local_irq_save(flags);
|
||||
update_rq_clock(cpu_rq(cpu));
|
||||
set_task_cpu(p, cpu);
|
||||
local_irq_restore(flags);
|
||||
|
||||
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
|
||||
if (likely(sched_info_on()))
|
||||
@@ -2732,9 +2768,9 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
|
||||
prev_state = prev->state;
|
||||
finish_arch_switch(prev);
|
||||
perf_event_task_sched_in(current, cpu_of(rq));
|
||||
fire_sched_in_preempt_notifiers(current);
|
||||
finish_lock_switch(rq, prev);
|
||||
|
||||
fire_sched_in_preempt_notifiers(current);
|
||||
if (mm)
|
||||
mmdrop(mm);
|
||||
if (unlikely(prev_state == TASK_DEAD)) {
|
||||
@@ -7898,6 +7934,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
||||
|
||||
static void free_rootdomain(struct root_domain *rd)
|
||||
{
|
||||
synchronize_sched();
|
||||
|
||||
cpupri_cleanup(&rd->cpupri);
|
||||
|
||||
free_cpumask_var(rd->rto_mask);
|
||||
@@ -9449,6 +9487,10 @@ void __init sched_init(void)
|
||||
#endif /* CONFIG_USER_SCHED */
|
||||
#endif /* CONFIG_GROUP_SCHED */
|
||||
|
||||
#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
|
||||
update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
|
||||
__alignof__(unsigned long));
|
||||
#endif
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq;
|
||||
|
||||
@@ -9576,13 +9618,13 @@ void __init sched_init(void)
|
||||
current->sched_class = &fair_sched_class;
|
||||
|
||||
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
|
||||
alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
|
||||
zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
|
||||
#ifdef CONFIG_SMP
|
||||
#ifdef CONFIG_NO_HZ
|
||||
alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
|
||||
zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
|
||||
alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
|
||||
#endif
|
||||
alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
|
||||
zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
|
||||
#endif /* SMP */
|
||||
|
||||
perf_event_init();
|
||||
|
@@ -822,6 +822,26 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
|
||||
* re-elected due to buddy favours.
|
||||
*/
|
||||
clear_buddies(cfs_rq, curr);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that a task that missed wakeup preemption by a
|
||||
* narrow margin doesn't have to wait for a full slice.
|
||||
* This also mitigates buddy induced latencies under load.
|
||||
*/
|
||||
if (!sched_feat(WAKEUP_PREEMPT))
|
||||
return;
|
||||
|
||||
if (delta_exec < sysctl_sched_min_granularity)
|
||||
return;
|
||||
|
||||
if (cfs_rq->nr_running > 1) {
|
||||
struct sched_entity *se = __pick_next_entity(cfs_rq);
|
||||
s64 delta = curr->vruntime - se->vruntime;
|
||||
|
||||
if (delta > ideal_runtime)
|
||||
resched_task(rq_of(cfs_rq)->curr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -861,12 +881,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
|
||||
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
struct sched_entity *se = __pick_next_entity(cfs_rq);
|
||||
struct sched_entity *left = se;
|
||||
|
||||
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
|
||||
return cfs_rq->next;
|
||||
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1)
|
||||
se = cfs_rq->next;
|
||||
|
||||
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
|
||||
return cfs_rq->last;
|
||||
/*
|
||||
* Prefer last buddy, try to return the CPU to a preempted task.
|
||||
*/
|
||||
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1)
|
||||
se = cfs_rq->last;
|
||||
|
||||
clear_buddies(cfs_rq, se);
|
||||
|
||||
return se;
|
||||
}
|
||||
@@ -1623,6 +1649,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
int sync = wake_flags & WF_SYNC;
|
||||
int scale = cfs_rq->nr_running >= sched_nr_latency;
|
||||
|
||||
update_curr(cfs_rq);
|
||||
|
||||
@@ -1637,18 +1664,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
if (unlikely(se == pse))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Only set the backward buddy when the current task is still on the
|
||||
* rq. This can happen when a wakeup gets interleaved with schedule on
|
||||
* the ->pre_schedule() or idle_balance() point, either of which can
|
||||
* drop the rq lock.
|
||||
*
|
||||
* Also, during early boot the idle thread is in the fair class, for
|
||||
* obvious reasons its a bad idea to schedule back to the idle thread.
|
||||
*/
|
||||
if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
|
||||
set_last_buddy(se);
|
||||
if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK))
|
||||
if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))
|
||||
set_next_buddy(pse);
|
||||
|
||||
/*
|
||||
@@ -1694,8 +1710,22 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
|
||||
|
||||
BUG_ON(!pse);
|
||||
|
||||
if (wakeup_preempt_entity(se, pse) == 1)
|
||||
if (wakeup_preempt_entity(se, pse) == 1) {
|
||||
resched_task(curr);
|
||||
/*
|
||||
* Only set the backward buddy when the current task is still
|
||||
* on the rq. This can happen when a wakeup gets interleaved
|
||||
* with schedule on the ->pre_schedule() or idle_balance()
|
||||
* point, either of which can * drop the rq lock.
|
||||
*
|
||||
* Also, during early boot the idle thread is in the fair class,
|
||||
* for obvious reasons its a bad idea to schedule back to it.
|
||||
*/
|
||||
if (unlikely(!se->on_rq || curr == rq->idle))
|
||||
return;
|
||||
if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
|
||||
set_last_buddy(se);
|
||||
}
|
||||
}
|
||||
|
||||
static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
@@ -1709,16 +1739,6 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
|
||||
|
||||
do {
|
||||
se = pick_next_entity(cfs_rq);
|
||||
/*
|
||||
* If se was a buddy, clear it so that it will have to earn
|
||||
* the favour again.
|
||||
*
|
||||
* If se was not a buddy, clear the buddies because neither
|
||||
* was elegible to run, let them earn it again.
|
||||
*
|
||||
* IOW. unconditionally clear buddies.
|
||||
*/
|
||||
__clear_buddies(cfs_rq, NULL);
|
||||
set_next_entity(cfs_rq, se);
|
||||
cfs_rq = group_cfs_rq(se);
|
||||
} while (cfs_rq);
|
||||
|
25
kernel/sys.c
25
kernel/sys.c
@@ -1110,6 +1110,8 @@ SYSCALL_DEFINE0(setsid)
|
||||
err = session;
|
||||
out:
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
if (err > 0)
|
||||
proc_sid_connector(group_leader);
|
||||
return err;
|
||||
}
|
||||
|
||||
@@ -1546,24 +1548,37 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
|
||||
if (arg4 | arg5)
|
||||
return -EINVAL;
|
||||
switch (arg2) {
|
||||
case 0:
|
||||
case PR_MCE_KILL_CLEAR:
|
||||
if (arg3 != 0)
|
||||
return -EINVAL;
|
||||
current->flags &= ~PF_MCE_PROCESS;
|
||||
break;
|
||||
case 1:
|
||||
case PR_MCE_KILL_SET:
|
||||
current->flags |= PF_MCE_PROCESS;
|
||||
if (arg3 != 0)
|
||||
if (arg3 == PR_MCE_KILL_EARLY)
|
||||
current->flags |= PF_MCE_EARLY;
|
||||
else
|
||||
else if (arg3 == PR_MCE_KILL_LATE)
|
||||
current->flags &= ~PF_MCE_EARLY;
|
||||
else if (arg3 == PR_MCE_KILL_DEFAULT)
|
||||
current->flags &=
|
||||
~(PF_MCE_EARLY|PF_MCE_PROCESS);
|
||||
else
|
||||
return -EINVAL;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
error = 0;
|
||||
break;
|
||||
|
||||
case PR_MCE_KILL_GET:
|
||||
if (arg2 | arg3 | arg4 | arg5)
|
||||
return -EINVAL;
|
||||
if (current->flags & PF_MCE_PROCESS)
|
||||
error = (current->flags & PF_MCE_EARLY) ?
|
||||
PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
|
||||
else
|
||||
error = PR_MCE_KILL_DEFAULT;
|
||||
break;
|
||||
default:
|
||||
error = -EINVAL;
|
||||
break;
|
||||
|
@@ -1521,7 +1521,7 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
|
||||
if (!table->ctl_name && table->strategy)
|
||||
set_fail(&fail, table, "Strategy without ctl_name");
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_FS
|
||||
#ifdef CONFIG_PROC_SYSCTL
|
||||
if (table->procname && !table->proc_handler)
|
||||
set_fail(&fail, table, "No proc_handler");
|
||||
#endif
|
||||
|
@@ -740,7 +740,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf,
|
||||
out:
|
||||
mutex_unlock(&ftrace_profile_lock);
|
||||
|
||||
filp->f_pos += cnt;
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
@@ -2222,15 +2222,15 @@ ftrace_regex_write(struct file *file, const char __user *ubuf,
|
||||
ret = ftrace_process_regex(parser->buffer,
|
||||
parser->idx, enable);
|
||||
if (ret)
|
||||
goto out;
|
||||
goto out_unlock;
|
||||
|
||||
trace_parser_clear(parser);
|
||||
}
|
||||
|
||||
ret = read;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ftrace_regex_lock);
|
||||
out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@@ -483,7 +483,7 @@ struct ring_buffer_iter {
|
||||
/* Up this if you want to test the TIME_EXTENTS and normalization */
|
||||
#define DEBUG_SHIFT 0
|
||||
|
||||
static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
|
||||
static inline u64 rb_time_stamp(struct ring_buffer *buffer)
|
||||
{
|
||||
/* shift to debug/test normalization and TIME_EXTENTS */
|
||||
return buffer->clock() << DEBUG_SHIFT;
|
||||
@@ -494,7 +494,7 @@ u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
|
||||
u64 time;
|
||||
|
||||
preempt_disable_notrace();
|
||||
time = rb_time_stamp(buffer, cpu);
|
||||
time = rb_time_stamp(buffer);
|
||||
preempt_enable_no_resched_notrace();
|
||||
|
||||
return time;
|
||||
@@ -599,7 +599,7 @@ static struct list_head *rb_list_head(struct list_head *list)
|
||||
}
|
||||
|
||||
/*
|
||||
* rb_is_head_page - test if the give page is the head page
|
||||
* rb_is_head_page - test if the given page is the head page
|
||||
*
|
||||
* Because the reader may move the head_page pointer, we can
|
||||
* not trust what the head page is (it may be pointing to
|
||||
@@ -1193,6 +1193,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
|
||||
atomic_inc(&cpu_buffer->record_disabled);
|
||||
synchronize_sched();
|
||||
|
||||
spin_lock_irq(&cpu_buffer->reader_lock);
|
||||
rb_head_page_deactivate(cpu_buffer);
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
@@ -1207,6 +1208,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
|
||||
return;
|
||||
|
||||
rb_reset_cpu(cpu_buffer);
|
||||
spin_unlock_irq(&cpu_buffer->reader_lock);
|
||||
|
||||
rb_check_pages(cpu_buffer);
|
||||
|
||||
@@ -1868,7 +1870,7 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
* Nested commits always have zero deltas, so
|
||||
* just reread the time stamp
|
||||
*/
|
||||
*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
|
||||
*ts = rb_time_stamp(buffer);
|
||||
next_page->page->time_stamp = *ts;
|
||||
}
|
||||
|
||||
@@ -2111,7 +2113,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
|
||||
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
|
||||
goto out_fail;
|
||||
|
||||
ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
|
||||
ts = rb_time_stamp(cpu_buffer->buffer);
|
||||
|
||||
/*
|
||||
* Only the first commit can update the timestamp.
|
||||
@@ -2681,7 +2683,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
|
||||
EXPORT_SYMBOL_GPL(ring_buffer_entries);
|
||||
|
||||
/**
|
||||
* ring_buffer_overrun_cpu - get the number of overruns in buffer
|
||||
* ring_buffer_overruns - get the number of overruns in buffer
|
||||
* @buffer: The ring buffer
|
||||
*
|
||||
* Returns the total number of overruns in the ring buffer
|
||||
|
@@ -2440,7 +2440,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
|
||||
return ret;
|
||||
}
|
||||
|
||||
filp->f_pos += cnt;
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
@@ -2582,7 +2582,7 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
|
||||
}
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
filp->f_pos += cnt;
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
@@ -2764,7 +2764,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
filp->f_pos += ret;
|
||||
*ppos += ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -3299,7 +3299,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
|
||||
}
|
||||
}
|
||||
|
||||
filp->f_pos += cnt;
|
||||
*ppos += cnt;
|
||||
|
||||
/* If check pages failed, return ENOMEM */
|
||||
if (tracing_disabled)
|
||||
|
@@ -69,6 +69,9 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
|
||||
* @s: trace sequence descriptor
|
||||
* @fmt: printf format string
|
||||
*
|
||||
* It returns 0 if the trace oversizes the buffer's free
|
||||
* space, 1 otherwise.
|
||||
*
|
||||
* The tracer may use either sequence operations or its own
|
||||
* copy to user routines. To simplify formating of a trace
|
||||
* trace_seq_printf is used to store strings into a special
|
||||
@@ -95,7 +98,7 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
|
||||
|
||||
s->len += ret;
|
||||
|
||||
return len;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_seq_printf);
|
||||
|
||||
|
@@ -330,9 +330,9 @@ done:
|
||||
*/
|
||||
static void free_user(struct user_struct *up, unsigned long flags)
|
||||
{
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
|
||||
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
|
||||
spin_unlock_irqrestore(&uidhash_lock, flags);
|
||||
}
|
||||
|
||||
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */
|
||||
|
@@ -685,21 +685,38 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
|
||||
int schedule_on_each_cpu(work_func_t func)
|
||||
{
|
||||
int cpu;
|
||||
int orig = -1;
|
||||
struct work_struct *works;
|
||||
|
||||
works = alloc_percpu(struct work_struct);
|
||||
if (!works)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* when running in keventd don't schedule a work item on itself.
|
||||
* Can just call directly because the work queue is already bound.
|
||||
* This also is faster.
|
||||
* Make this a generic parameter for other workqueues?
|
||||
*/
|
||||
if (current_is_keventd()) {
|
||||
orig = raw_smp_processor_id();
|
||||
INIT_WORK(per_cpu_ptr(works, orig), func);
|
||||
func(per_cpu_ptr(works, orig));
|
||||
}
|
||||
|
||||
get_online_cpus();
|
||||
for_each_online_cpu(cpu) {
|
||||
struct work_struct *work = per_cpu_ptr(works, cpu);
|
||||
|
||||
if (cpu == orig)
|
||||
continue;
|
||||
INIT_WORK(work, func);
|
||||
schedule_work_on(cpu, work);
|
||||
}
|
||||
for_each_online_cpu(cpu)
|
||||
flush_work(per_cpu_ptr(works, cpu));
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu != orig)
|
||||
flush_work(per_cpu_ptr(works, cpu));
|
||||
}
|
||||
put_online_cpus();
|
||||
free_percpu(works);
|
||||
return 0;
|
||||
|
Reference in New Issue
Block a user