Merge branch 'perf/urgent' into perf/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Esse commit está contido em:
@@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
*/
|
||||
p++;
|
||||
if (p >= end) {
|
||||
(*pos)++;
|
||||
return NULL;
|
||||
} else {
|
||||
*pos = *p;
|
||||
@@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work)
|
||||
|
||||
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
|
||||
if (!pathbuf || !agentbuf)
|
||||
if (!pathbuf || !agentbuf || !strlen(agentbuf))
|
||||
goto out;
|
||||
|
||||
spin_lock_irq(&css_set_lock);
|
||||
|
@@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
|
||||
static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_IO);
|
||||
}
|
||||
static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_MEM);
|
||||
}
|
||||
static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
|
||||
{
|
||||
struct cgroup *cgrp = seq_css(seq)->cgroup;
|
||||
struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
|
||||
|
||||
return psi_show(seq, psi, PSI_CPU);
|
||||
}
|
||||
@@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)
|
||||
}
|
||||
} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks));
|
||||
|
||||
if (!list_empty(&cset->tasks))
|
||||
if (!list_empty(&cset->tasks)) {
|
||||
it->task_pos = cset->tasks.next;
|
||||
else if (!list_empty(&cset->mg_tasks))
|
||||
it->cur_tasks_head = &cset->tasks;
|
||||
} else if (!list_empty(&cset->mg_tasks)) {
|
||||
it->task_pos = cset->mg_tasks.next;
|
||||
else
|
||||
it->cur_tasks_head = &cset->mg_tasks;
|
||||
} else {
|
||||
it->task_pos = cset->dying_tasks.next;
|
||||
it->cur_tasks_head = &cset->dying_tasks;
|
||||
}
|
||||
|
||||
it->tasks_head = &cset->tasks;
|
||||
it->mg_tasks_head = &cset->mg_tasks;
|
||||
@@ -4463,10 +4467,14 @@ repeat:
|
||||
else
|
||||
it->task_pos = it->task_pos->next;
|
||||
|
||||
if (it->task_pos == it->tasks_head)
|
||||
if (it->task_pos == it->tasks_head) {
|
||||
it->task_pos = it->mg_tasks_head->next;
|
||||
if (it->task_pos == it->mg_tasks_head)
|
||||
it->cur_tasks_head = it->mg_tasks_head;
|
||||
}
|
||||
if (it->task_pos == it->mg_tasks_head) {
|
||||
it->task_pos = it->dying_tasks_head->next;
|
||||
it->cur_tasks_head = it->dying_tasks_head;
|
||||
}
|
||||
if (it->task_pos == it->dying_tasks_head)
|
||||
css_task_iter_advance_css_set(it);
|
||||
} else {
|
||||
@@ -4485,11 +4493,12 @@ repeat:
|
||||
goto repeat;
|
||||
|
||||
/* and dying leaders w/o live member threads */
|
||||
if (!atomic_read(&task->signal->live))
|
||||
if (it->cur_tasks_head == it->dying_tasks_head &&
|
||||
!atomic_read(&task->signal->live))
|
||||
goto repeat;
|
||||
} else {
|
||||
/* skip all dying ones */
|
||||
if (task->flags & PF_EXITING)
|
||||
if (it->cur_tasks_head == it->dying_tasks_head)
|
||||
goto repeat;
|
||||
}
|
||||
}
|
||||
@@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
|
||||
struct kernfs_open_file *of = s->private;
|
||||
struct css_task_iter *it = of->priv;
|
||||
|
||||
if (pos)
|
||||
(*pos)++;
|
||||
|
||||
return css_task_iter_next(it);
|
||||
}
|
||||
|
||||
@@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
||||
* from position 0, so we can simply keep iterating on !0 *pos.
|
||||
*/
|
||||
if (!it) {
|
||||
if (WARN_ON_ONCE((*pos)++))
|
||||
if (WARN_ON_ONCE((*pos)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
it = kzalloc(sizeof(*it), GFP_KERNEL);
|
||||
@@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
of->priv = it;
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
} else if (!(*pos)++) {
|
||||
} else if (!(*pos)) {
|
||||
css_task_iter_end(it);
|
||||
css_task_iter_start(&cgrp->self, iter_flags, it);
|
||||
}
|
||||
} else
|
||||
return it->cur_task;
|
||||
|
||||
return cgroup_procs_next(s, NULL, NULL);
|
||||
}
|
||||
@@ -6258,6 +6271,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Don't associate the sock with unrelated interrupted task's cgroup. */
|
||||
if (in_interrupt())
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
while (true) {
|
||||
|
@@ -619,8 +619,8 @@ static void forget_original_parent(struct task_struct *father,
|
||||
reaper = find_new_reaper(father, reaper);
|
||||
list_for_each_entry(p, &father->children, sibling) {
|
||||
for_each_thread(p, t) {
|
||||
t->real_parent = reaper;
|
||||
BUG_ON((!t->ptrace) != (t->parent == father));
|
||||
RCU_INIT_POINTER(t->real_parent, reaper);
|
||||
BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father));
|
||||
if (likely(!t->ptrace))
|
||||
t->parent = t->real_parent;
|
||||
if (t->pdeath_signal)
|
||||
|
@@ -1508,7 +1508,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
|
||||
return 0;
|
||||
}
|
||||
sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
|
||||
rcu_assign_pointer(tsk->sighand, sig);
|
||||
RCU_INIT_POINTER(tsk->sighand, sig);
|
||||
if (!sig)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@@ -385,9 +385,9 @@ static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
|
||||
*/
|
||||
static struct futex_hash_bucket *hash_futex(union futex_key *key)
|
||||
{
|
||||
u32 hash = jhash2((u32*)&key->both.word,
|
||||
(sizeof(key->both.word)+sizeof(key->both.ptr))/4,
|
||||
u32 hash = jhash2((u32 *)key, offsetof(typeof(*key), both.offset) / 4,
|
||||
key->both.offset);
|
||||
|
||||
return &futex_queues[hash & (futex_hashsize - 1)];
|
||||
}
|
||||
|
||||
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
ihold(key->shared.inode); /* implies smp_mb(); (B) */
|
||||
smp_mb(); /* explicit smp_mb(); (B) */
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
futex_get_mm(key); /* implies smp_mb(); (B) */
|
||||
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
|
||||
switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
|
||||
case FUT_OFF_INODE:
|
||||
iput(key->shared.inode);
|
||||
break;
|
||||
case FUT_OFF_MMSHARED:
|
||||
mmdrop(key->private.mm);
|
||||
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a machine wide unique identifier for this inode.
|
||||
*
|
||||
* This relies on u64 not wrapping in the life-time of the machine; which with
|
||||
* 1ns resolution means almost 585 years.
|
||||
*
|
||||
* This further relies on the fact that a well formed program will not unmap
|
||||
* the file while it has a (shared) futex waiting on it. This mapping will have
|
||||
* a file reference which pins the mount and inode.
|
||||
*
|
||||
* If for some reason an inode gets evicted and read back in again, it will get
|
||||
* a new sequence number and will _NOT_ match, even though it is the exact same
|
||||
* file.
|
||||
*
|
||||
* It is important that match_futex() will never have a false-positive, esp.
|
||||
* for PI futexes that can mess up the state. The above argues that false-negatives
|
||||
* are only possible for malformed programs.
|
||||
*/
|
||||
static u64 get_inode_sequence_number(struct inode *inode)
|
||||
{
|
||||
static atomic64_t i_seq;
|
||||
u64 old;
|
||||
|
||||
/* Does the inode already have a sequence number? */
|
||||
old = atomic64_read(&inode->i_sequence);
|
||||
if (likely(old))
|
||||
return old;
|
||||
|
||||
for (;;) {
|
||||
u64 new = atomic64_add_return(1, &i_seq);
|
||||
if (WARN_ON_ONCE(!new))
|
||||
continue;
|
||||
|
||||
old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
|
||||
if (old)
|
||||
return old;
|
||||
return new;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get_futex_key() - Get parameters which are the keys for a futex
|
||||
* @uaddr: virtual address of the futex
|
||||
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
|
||||
*
|
||||
* The key words are stored in @key on success.
|
||||
*
|
||||
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
|
||||
* offset_within_page). For private mappings, it's (uaddr, current->mm).
|
||||
* We can usually work out the index without swapping in the page.
|
||||
* For shared mappings (when @fshared), the key is:
|
||||
* ( inode->i_sequence, page->index, offset_within_page )
|
||||
* [ also see get_inode_sequence_number() ]
|
||||
*
|
||||
* For private mappings (or when !@fshared), the key is:
|
||||
* ( current->mm, address, 0 )
|
||||
*
|
||||
* This allows (cross process, where applicable) identification of the futex
|
||||
* without keeping the page pinned for the duration of the FUTEX_WAIT.
|
||||
*
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
@@ -659,8 +704,6 @@ again:
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
} else {
|
||||
struct inode *inode;
|
||||
|
||||
@@ -692,40 +735,14 @@ again:
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take a reference unless it is about to be freed. Previously
|
||||
* this reference was taken by ihold under the page lock
|
||||
* pinning the inode in place so i_lock was unnecessary. The
|
||||
* only way for this check to fail is if the inode was
|
||||
* truncated in parallel which is almost certainly an
|
||||
* application bug. In such a case, just retry.
|
||||
*
|
||||
* We are not calling into get_futex_key_refs() in file-backed
|
||||
* cases, therefore a successful atomic_inc return below will
|
||||
* guarantee that get_futex_key() will still imply smp_mb(); (B).
|
||||
*/
|
||||
if (!atomic_inc_not_zero(&inode->i_count)) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
/* Should be impossible but lets be paranoid for now */
|
||||
if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
|
||||
err = -EFAULT;
|
||||
rcu_read_unlock();
|
||||
iput(inode);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||
key->shared.inode = inode;
|
||||
key->shared.i_seq = get_inode_sequence_number(inode);
|
||||
key->shared.pgoff = basepage_index(tail);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
get_futex_key_refs(key); /* implies smp_mb(); (B) */
|
||||
|
||||
out:
|
||||
put_page(page);
|
||||
return err;
|
||||
|
10
kernel/pid.c
10
kernel/pid.c
@@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||
tmp = tmp->parent;
|
||||
}
|
||||
|
||||
/*
|
||||
* ENOMEM is not the most obvious choice especially for the case
|
||||
* where the child subreaper has already exited and the pid
|
||||
* namespace denies the creation of any new processes. But ENOMEM
|
||||
* is what we have exposed to userspace for a long time and it is
|
||||
* documented behavior for pid namespaces. So we can't easily
|
||||
* change it even if there were an error code better suited.
|
||||
*/
|
||||
retval = -ENOMEM;
|
||||
|
||||
if (unlikely(is_child_reaper(pid))) {
|
||||
if (pid_ns_prepare_proc(ns))
|
||||
goto out_free;
|
||||
|
@@ -47,6 +47,7 @@
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/time_namespace.h>
|
||||
#include <linux/binfmts.h>
|
||||
|
||||
#include <linux/sched.h>
|
||||
@@ -2546,6 +2547,7 @@ static int do_sysinfo(struct sysinfo *info)
|
||||
memset(info, 0, sizeof(struct sysinfo));
|
||||
|
||||
ktime_get_boottime_ts64(&tp);
|
||||
timens_add_boottime(&tp);
|
||||
info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
|
||||
|
||||
get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
|
||||
|
@@ -1896,8 +1896,11 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (bt == NULL)
|
||||
if (bt == NULL) {
|
||||
ret = blk_trace_setup_queue(q, bdev);
|
||||
bt = rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
if (attr == &dev_attr_act_mask)
|
||||
|
@@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
|
||||
rec = bsearch(&key, pg->records, pg->index,
|
||||
sizeof(struct dyn_ftrace),
|
||||
ftrace_cmp_recs);
|
||||
if (rec)
|
||||
break;
|
||||
}
|
||||
return rec;
|
||||
}
|
||||
|
@@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
||||
return;
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||
|
||||
/* pwq which will be used unless @work is executing elsewhere */
|
||||
if (!(wq->flags & WQ_UNBOUND))
|
||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||
else
|
||||
if (wq->flags & WQ_UNBOUND) {
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||
pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu));
|
||||
} else {
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = raw_smp_processor_id();
|
||||
pwq = per_cpu_ptr(wq->cpu_pwqs, cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* If @work was previously on a different pool, it might still be
|
||||
|
Referência em uma nova issue
Block a user