Merge 3.10-rc7 into driver-core-next

We want the firmware merge fixes, and other bits, in here now.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Greg Kroah-Hartman
2013-06-24 15:14:43 -07:00
161 changed files with 1300 additions and 754 deletions

View File

@@ -15,7 +15,6 @@
*/
#include <linux/context_tracking.h>
#include <linux/kvm_host.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/hardirq.h>
@@ -71,6 +70,46 @@ void user_enter(void)
local_irq_restore(flags);
}
#ifdef CONFIG_PREEMPT
/**
* preempt_schedule_context - preempt_schedule called by tracing
*
* The tracing infrastructure uses preempt_enable_notrace to prevent
* recursion and tracing preempt enabling caused by the tracing
* infrastructure itself. But as tracing can happen in areas coming
* from userspace or just about to enter userspace, a preempt enable
* can occur before user_exit() is called. This will cause the scheduler
* to be called when the system is still in usermode.
*
* To prevent this, the preempt_enable_notrace will use this function
* instead of preempt_schedule() to exit user context if needed before
* calling the scheduler.
*/
void __sched notrace preempt_schedule_context(void)
{
struct thread_info *ti = current_thread_info();
enum ctx_state prev_ctx;
if (likely(ti->preempt_count || irqs_disabled()))
return;
/*
* Need to disable preemption in case user_exit() is traced
* and the tracer calls preempt_enable_notrace() causing
* an infinite recursion.
*/
preempt_disable_notrace();
prev_ctx = exception_enter();
preempt_enable_no_resched_notrace();
preempt_schedule();
preempt_disable_notrace();
exception_exit(prev_ctx);
preempt_enable_notrace();
}
EXPORT_SYMBOL_GPL(preempt_schedule_context);
#endif /* CONFIG_PREEMPT */
/**
* user_exit - Inform the context tracking that the CPU is

View File

@@ -5,6 +5,7 @@
#include <linux/cpu.h>
#include <linux/tick.h>
#include <linux/mm.h>
#include <linux/stackprotector.h>
#include <asm/tlb.h>
@@ -58,6 +59,7 @@ void __weak arch_cpu_idle_dead(void) { }
void __weak arch_cpu_idle(void)
{
cpu_idle_force_poll = 1;
local_irq_enable();
}
/*
@@ -112,6 +114,21 @@ static void cpu_idle_loop(void)
void cpu_startup_entry(enum cpuhp_state state)
{
/*
* This #ifdef needs to die, but it's too late in the cycle to
* make this generic (arm and sh have never invoked the canary
* init for the non boot cpus!). Will be fixed in 3.11
*/
#ifdef CONFIG_X86
/*
* If we're the non-boot CPU, nothing set the stack canary up
* for us. The boot CPU already has it initialized but no harm
* in doing it again. This is a good place for updating it, as
* we wont ever return from this function (so the invalid
* canaries already on the stack wont ever trigger).
*/
boot_init_stack_canary();
#endif
current_set_polling();
arch_cpu_idle_prepare();
cpu_idle_loop();

View File

@@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);
static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -2918,6 +2915,7 @@ static void free_event_rcu(struct rcu_head *head)
}
static void ring_buffer_put(struct ring_buffer *rb);
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
static void free_event(struct perf_event *event)
{
@@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event)
if (has_branch_stack(event)) {
static_key_slow_dec_deferred(&perf_sched_events);
/* is system-wide event */
if (!(event->attach_state & PERF_ATTACH_TASK))
if (!(event->attach_state & PERF_ATTACH_TASK)) {
atomic_dec(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
}
if (event->rb) {
ring_buffer_put(event->rb);
event->rb = NULL;
struct ring_buffer *rb;
/*
* Can happen when we close an event with re-directed output.
*
* Since we have a 0 refcount, perf_mmap_close() will skip
* over us; possibly making our ring_buffer_put() the last.
*/
mutex_lock(&event->mmap_mutex);
rb = event->rb;
if (rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* could be last */
}
mutex_unlock(&event->mmap_mutex);
}
if (is_cgroup_event(event))
@@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
unsigned int events = POLL_HUP;
/*
* Race between perf_event_set_output() and perf_poll(): perf_poll()
* grabs the rb reference but perf_event_set_output() overrides it.
* Here is the timeline for two threads T1, T2:
* t0: T1, rb = rcu_dereference(event->rb)
* t1: T2, old_rb = event->rb
* t2: T2, event->rb = new rb
* t3: T2, ring_buffer_detach(old_rb)
* t4: T1, ring_buffer_attach(rb1)
* t5: T1, poll_wait(event->waitq)
*
* To avoid this problem, we grab mmap_mutex in perf_poll()
* thereby ensuring that the assignment of the new ring buffer
* and the detachment of the old buffer appear atomic to perf_poll()
* Pin the event->rb by taking event->mmap_mutex; otherwise
* perf_event_set_output() can swizzle our rb and make us miss wakeups.
*/
mutex_lock(&event->mmap_mutex);
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
ring_buffer_attach(event, rb);
rb = event->rb;
if (rb)
events = atomic_xchg(&rb->poll, 0);
}
rcu_read_unlock();
mutex_unlock(&event->mmap_mutex);
poll_wait(file, &event->waitq, wait);
@@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event,
return;
spin_lock_irqsave(&rb->event_lock, flags);
if (!list_empty(&event->rb_entry))
goto unlock;
list_add(&event->rb_entry, &rb->event_list);
unlock:
if (list_empty(&event->rb_entry))
list_add(&event->rb_entry, &rb->event_list);
spin_unlock_irqrestore(&rb->event_lock, flags);
}
static void ring_buffer_detach(struct perf_event *event,
struct ring_buffer *rb)
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
{
unsigned long flags;
@@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event)
rcu_read_lock();
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
wake_up_all(&event->waitq);
unlock:
if (rb) {
list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
wake_up_all(&event->waitq);
}
rcu_read_unlock();
}
@@ -3584,18 +3573,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
static void ring_buffer_put(struct ring_buffer *rb)
{
struct perf_event *event, *n;
unsigned long flags;
if (!atomic_dec_and_test(&rb->refcount))
return;
spin_lock_irqsave(&rb->event_lock, flags);
list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
list_del_init(&event->rb_entry);
wake_up_all(&event->waitq);
}
spin_unlock_irqrestore(&rb->event_lock, flags);
WARN_ON_ONCE(!list_empty(&rb->event_list));
call_rcu(&rb->rcu_head, rb_free_rcu);
}
@@ -3605,26 +3586,100 @@ static void perf_mmap_open(struct vm_area_struct *vma)
struct perf_event *event = vma->vm_file->private_data;
atomic_inc(&event->mmap_count);
atomic_inc(&event->rb->mmap_count);
}
/*
* A buffer can be mmap()ed multiple times; either directly through the same
* event, or through other events by use of perf_event_set_output().
*
* In order to undo the VM accounting done by perf_mmap() we need to destroy
* the buffer here, where we still have a VM context. This means we need
* to detach all events redirecting to us.
*/
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
unsigned long size = perf_data_size(event->rb);
struct user_struct *user = event->mmap_user;
struct ring_buffer *rb = event->rb;
struct ring_buffer *rb = event->rb;
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
vma->vm_mm->pinned_vm -= event->mmap_locked;
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);
atomic_dec(&rb->mmap_count);
ring_buffer_put(rb);
free_uid(user);
if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
return;
/* Detach current event from the buffer. */
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);
/* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count)) {
ring_buffer_put(rb); /* can't be last */
return;
}
/*
* No other mmap()s, detach from all other events that might redirect
* into the now unreachable buffer. Somewhat complicated by the
* fact that rb::event_lock otherwise nests inside mmap_mutex.
*/
again:
rcu_read_lock();
list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
if (!atomic_long_inc_not_zero(&event->refcount)) {
/*
* This event is en-route to free_event() which will
* detach it and remove it from the list.
*/
continue;
}
rcu_read_unlock();
mutex_lock(&event->mmap_mutex);
/*
* Check we didn't race with perf_event_set_output() which can
* swizzle the rb from under us while we were waiting to
* acquire mmap_mutex.
*
* If we find a different rb; ignore this event, a next
* iteration will no longer find it on the list. We have to
* still restart the iteration to make sure we're not now
* iterating the wrong list.
*/
if (event->rb == rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* can't be last, we still have one */
}
mutex_unlock(&event->mmap_mutex);
put_event(event);
/*
* Restart the iteration; either we're on the wrong list or
* destroyed its integrity by doing a deletion.
*/
goto again;
}
rcu_read_unlock();
/*
* It could be there's still a few 0-ref events on the list; they'll
* get cleaned up by free_event() -- they'll also still have their
* ref on the rb and will free it whenever they are done with it.
*
* Aside from that, this buffer is 'fully' detached and unmapped,
* undo the VM accounting.
*/
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= mmap_locked;
free_uid(mmap_user);
ring_buffer_put(rb); /* could be last */
}
static const struct vm_operations_struct perf_mmap_vmops = {
@@ -3674,12 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL;
WARN_ON_ONCE(event->ctx->parent_ctx);
again:
mutex_lock(&event->mmap_mutex);
if (event->rb) {
if (event->rb->nr_pages == nr_pages)
atomic_inc(&event->rb->refcount);
else
if (event->rb->nr_pages != nr_pages) {
ret = -EINVAL;
goto unlock;
}
if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
/*
* Raced against perf_mmap_close() through
* perf_event_set_output(). Try again, hope for better
* luck.
*/
mutex_unlock(&event->mmap_mutex);
goto again;
}
goto unlock;
}
@@ -3720,12 +3787,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
ret = -ENOMEM;
goto unlock;
}
rcu_assign_pointer(event->rb, rb);
atomic_set(&rb->mmap_count, 1);
rb->mmap_locked = extra;
rb->mmap_user = get_current_user();
atomic_long_add(user_extra, &user->locked_vm);
event->mmap_locked = extra;
event->mmap_user = get_current_user();
vma->vm_mm->pinned_vm += event->mmap_locked;
vma->vm_mm->pinned_vm += extra;
ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);
perf_event_update_userpage(event);
@@ -3734,7 +3805,11 @@ unlock:
atomic_inc(&event->mmap_count);
mutex_unlock(&event->mmap_mutex);
vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
*/
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &perf_mmap_vmops;
return ret;
@@ -6412,6 +6487,8 @@ set:
if (atomic_read(&event->mmap_count))
goto unlock;
old_rb = event->rb;
if (output_event) {
/* get the rb we want to redirect to */
rb = ring_buffer_get(output_event);
@@ -6419,16 +6496,28 @@ set:
goto unlock;
}
old_rb = event->rb;
rcu_assign_pointer(event->rb, rb);
if (old_rb)
ring_buffer_detach(event, old_rb);
if (rb)
ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);
if (old_rb) {
ring_buffer_put(old_rb);
/*
* Since we detached before setting the new rb, so that we
* could attach the new rb, we could have missed a wakeup.
* Provide it now.
*/
wake_up_all(&event->waitq);
}
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
if (old_rb)
ring_buffer_put(old_rb);
out:
return ret;
}

View File

@@ -31,6 +31,10 @@ struct ring_buffer {
spinlock_t event_lock;
struct list_head event_list;
atomic_t mmap_count;
unsigned long mmap_locked;
struct user_struct *mmap_user;
struct perf_event_mmap_page *user_page;
void *data_pages[0];
};

View File

@@ -467,6 +467,7 @@ static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
/* Optimization staging list, protected by kprobe_mutex */
static LIST_HEAD(optimizing_list);
static LIST_HEAD(unoptimizing_list);
static LIST_HEAD(freeing_list);
static void kprobe_optimizer(struct work_struct *work);
static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
@@ -504,7 +505,7 @@ static __kprobes void do_optimize_kprobes(void)
* Unoptimize (replace a jump with a breakpoint and remove the breakpoint
* if need) kprobes listed on unoptimizing_list.
*/
static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
static __kprobes void do_unoptimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
@@ -515,9 +516,9 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
/* Ditto to do_optimize_kprobes */
get_online_cpus();
mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, free_list);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
list_for_each_entry_safe(op, tmp, free_list, list) {
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
/* Disarm probes if marked disabled */
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
@@ -536,11 +537,11 @@ static __kprobes void do_unoptimize_kprobes(struct list_head *free_list)
}
/* Reclaim all kprobes on the free_list */
static __kprobes void do_free_cleaned_kprobes(struct list_head *free_list)
static __kprobes void do_free_cleaned_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
list_for_each_entry_safe(op, tmp, free_list, list) {
list_for_each_entry_safe(op, tmp, &freeing_list, list) {
BUG_ON(!kprobe_unused(&op->kp));
list_del_init(&op->list);
free_aggr_kprobe(&op->kp);
@@ -556,8 +557,6 @@ static __kprobes void kick_kprobe_optimizer(void)
/* Kprobe jump optimizer */
static __kprobes void kprobe_optimizer(struct work_struct *work)
{
LIST_HEAD(free_list);
mutex_lock(&kprobe_mutex);
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
@@ -566,7 +565,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
* Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
* kprobes before waiting for quiesence period.
*/
do_unoptimize_kprobes(&free_list);
do_unoptimize_kprobes();
/*
* Step 2: Wait for quiesence period to ensure all running interrupts
@@ -581,7 +580,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
do_optimize_kprobes();
/* Step 4: Free cleaned kprobes after quiesence period */
do_free_cleaned_kprobes(&free_list);
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
mutex_unlock(&kprobe_mutex);
@@ -723,8 +722,19 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
if (!list_empty(&op->list))
/* Dequeue from the (un)optimization queue */
list_del_init(&op->list);
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
if (kprobe_unused(p)) {
/* Enqueue if it is unused */
list_add(&op->list, &freeing_list);
/*
* Remove unused probes from the hash list. After waiting
* for synchronization, this probe is reclaimed.
* (reclaiming is done by do_free_cleaned_kprobes().)
*/
hlist_del_rcu(&op->kp.hlist);
}
/* Don't touch the code, because it is already freed. */
arch_remove_optimized_kprobe(op);
}

View File

@@ -4,7 +4,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sort.h>
#include <linux/string.h>
#include <linux/range.h>
int add_range(struct range *range, int az, int nr_range, u64 start, u64 end)
@@ -32,9 +32,8 @@ int add_range_with_merge(struct range *range, int az, int nr_range,
if (start >= end)
return nr_range;
/* Try to merge it with old one: */
/* get new start/end: */
for (i = 0; i < nr_range; i++) {
u64 final_start, final_end;
u64 common_start, common_end;
if (!range[i].end)
@@ -45,14 +44,16 @@ int add_range_with_merge(struct range *range, int az, int nr_range,
if (common_start > common_end)
continue;
final_start = min(range[i].start, start);
final_end = max(range[i].end, end);
/* new start/end, will add it back at last */
start = min(range[i].start, start);
end = max(range[i].end, end);
/* clear it and add it back for further merge */
range[i].start = 0;
range[i].end = 0;
return add_range_with_merge(range, az, nr_range,
final_start, final_end);
memmove(&range[i], &range[i + 1],
(nr_range - (i + 1)) * sizeof(range[i]));
range[nr_range - 1].start = 0;
range[nr_range - 1].end = 0;
nr_range--;
i--;
}
/* Need to add it: */

View File

@@ -633,7 +633,19 @@ void wake_up_nohz_cpu(int cpu)
static inline bool got_nohz_idle_kick(void)
{
int cpu = smp_processor_id();
return idle_cpu(cpu) && test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
if (!test_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu)))
return false;
if (idle_cpu(cpu) && !need_resched())
return true;
/*
* We can't run Idle Load Balance on this CPU for this time so we
* cancel it and clear NOHZ_BALANCE_KICK
*/
clear_bit(NOHZ_BALANCE_KICK, nohz_flags(cpu));
return false;
}
#else /* CONFIG_NO_HZ_COMMON */
@@ -1393,8 +1405,9 @@ static void sched_ttwu_pending(void)
void scheduler_ipi(void)
{
if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick()
&& !tick_nohz_full_cpu(smp_processor_id()))
if (llist_empty(&this_rq()->wake_list)
&& !tick_nohz_full_cpu(smp_processor_id())
&& !got_nohz_idle_kick())
return;
/*
@@ -1417,7 +1430,7 @@ void scheduler_ipi(void)
/*
* Check if someone kicked us for doing the nohz idle load balance.
*/
if (unlikely(got_nohz_idle_kick() && !need_resched())) {
if (unlikely(got_nohz_idle_kick())) {
this_rq()->idle_balance = 1;
raise_softirq_irqoff(SCHED_SOFTIRQ);
}
@@ -4745,7 +4758,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
*/
idle->sched_class = &idle_sched_class;
ftrace_graph_init_idle_task(idle, cpu);
vtime_init_idle(idle);
vtime_init_idle(idle, cpu);
#if defined(CONFIG_SMP)
sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
#endif

View File

@@ -747,17 +747,17 @@ void arch_vtime_task_switch(struct task_struct *prev)
write_seqlock(&current->vtime_seqlock);
current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock();
current->vtime_snap = sched_clock_cpu(smp_processor_id());
write_sequnlock(&current->vtime_seqlock);
}
void vtime_init_idle(struct task_struct *t)
void vtime_init_idle(struct task_struct *t, int cpu)
{
unsigned long flags;
write_seqlock_irqsave(&t->vtime_seqlock, flags);
t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock();
t->vtime_snap = sched_clock_cpu(cpu);
write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
}

View File

@@ -698,10 +698,6 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
bc->event_handler = tick_handle_oneshot_broadcast;
/* Take the do_timer update */
if (!tick_nohz_full_cpu(cpu))
tick_do_timer_cpu = cpu;
/*
* We must be careful here. There might be other CPUs
* waiting for periodic broadcast. We need to set the

View File

@@ -306,7 +306,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
* we can't safely shutdown that CPU.
*/
if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
return -EINVAL;
return NOTIFY_BAD;
break;
}
return NOTIFY_OK;