Merge branch 'master' into next

This commit is contained in:
James Morris
2009-08-11 08:33:01 +10:00
205 changed files with 4126 additions and 1778 deletions

View File

@@ -568,18 +568,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
* the value intact in a core dump, and to save the unnecessary
* trouble otherwise. Userland only wants this done for a sys_exit.
*/
if (tsk->clear_child_tid
&& !(tsk->flags & PF_SIGNALED)
&& atomic_read(&mm->mm_users) > 1) {
u32 __user * tidptr = tsk->clear_child_tid;
if (tsk->clear_child_tid) {
if (!(tsk->flags & PF_SIGNALED) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
* not set up a proper pointer then tough luck.
*/
put_user(0, tsk->clear_child_tid);
sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
1, NULL, NULL, 0);
}
tsk->clear_child_tid = NULL;
/*
* We don't check the error code - if userspace has
* not set up a proper pointer then tough luck.
*/
put_user(0, tidptr);
sys_futex(tidptr, FUTEX_WAKE, 1, NULL, NULL, 0);
}
}

View File

@@ -107,8 +107,8 @@ out_unlock:
struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
{
/* those all static, do move them */
if (desc->irq < NR_IRQS_LEGACY)
/* those static or target node is -1, do not move them */
if (desc->irq < NR_IRQS_LEGACY || node == -1)
return desc;
if (desc->node != node)

View File

@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
&proc_lockdep_stats_operations);
#ifdef CONFIG_LOCK_STAT
proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations);
proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
&proc_lock_stat_operations);
#endif
return 0;

View File

@@ -1104,7 +1104,7 @@ static void perf_counter_sync_stat(struct perf_counter_context *ctx,
__perf_counter_sync_stat(counter, next_counter);
counter = list_next_entry(counter, event_entry);
next_counter = list_next_entry(counter, event_entry);
next_counter = list_next_entry(next_counter, event_entry);
}
}
@@ -2714,6 +2714,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
header.size += sizeof(u64);
}
if (sample_type & PERF_SAMPLE_RAW) {
int size = sizeof(u32);
if (data->raw)
size += data->raw->size;
else
size += sizeof(u32);
WARN_ON_ONCE(size & (sizeof(u64)-1));
header.size += size;
}
ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
if (ret)
return;
@@ -2777,6 +2789,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
}
}
if (sample_type & PERF_SAMPLE_RAW) {
if (data->raw) {
perf_output_put(&handle, data->raw->size);
perf_output_copy(&handle, data->raw->data, data->raw->size);
} else {
struct {
u32 size;
u32 data;
} raw = {
.size = sizeof(u32),
.data = 0,
};
perf_output_put(&handle, raw);
}
}
perf_output_end(&handle);
}
@@ -2840,7 +2868,8 @@ perf_counter_read_event(struct perf_counter *counter,
*/
struct perf_task_event {
struct task_struct *task;
struct task_struct *task;
struct perf_counter_context *task_ctx;
struct {
struct perf_event_header header;
@@ -2900,24 +2929,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
static void perf_counter_task_event(struct perf_task_event *task_event)
{
struct perf_cpu_context *cpuctx;
struct perf_counter_context *ctx;
struct perf_counter_context *ctx = task_event->task_ctx;
cpuctx = &get_cpu_var(perf_cpu_context);
perf_counter_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);
rcu_read_lock();
/*
* doesn't really matter which of the child contexts the
* events ends up in.
*/
ctx = rcu_dereference(current->perf_counter_ctxp);
if (!ctx)
ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
if (ctx)
perf_counter_task_ctx(ctx, task_event);
rcu_read_unlock();
}
static void perf_counter_task(struct task_struct *task, int new)
static void perf_counter_task(struct task_struct *task,
struct perf_counter_context *task_ctx,
int new)
{
struct perf_task_event task_event;
@@ -2927,8 +2955,9 @@ static void perf_counter_task(struct task_struct *task, int new)
return;
task_event = (struct perf_task_event){
.task = task,
.event = {
.task = task,
.task_ctx = task_ctx,
.event = {
.header = {
.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
.misc = 0,
@@ -2946,7 +2975,7 @@ static void perf_counter_task(struct task_struct *task, int new)
void perf_counter_fork(struct task_struct *task)
{
perf_counter_task(task, 1);
perf_counter_task(task, NULL, 1);
}
/*
@@ -3335,87 +3364,81 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
* Generic software counter infrastructure
*/
static void perf_swcounter_update(struct perf_counter *counter)
/*
* We directly increment counter->count and keep a second value in
* counter->hw.period_left to count intervals. This period counter
* is kept in the range [-sample_period, 0] so that we can use the
* sign as trigger.
*/
static u64 perf_swcounter_set_period(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
u64 prev, now;
s64 delta;
u64 period = hwc->last_period;
u64 nr, offset;
s64 old, val;
hwc->last_period = hwc->sample_period;
again:
prev = atomic64_read(&hwc->prev_count);
now = atomic64_read(&hwc->count);
if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
old = val = atomic64_read(&hwc->period_left);
if (val < 0)
return 0;
nr = div64_u64(period + val, period);
offset = nr * period;
val -= offset;
if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
goto again;
delta = now - prev;
atomic64_add(delta, &counter->count);
atomic64_sub(delta, &hwc->period_left);
}
static void perf_swcounter_set_period(struct perf_counter *counter)
{
struct hw_perf_counter *hwc = &counter->hw;
s64 left = atomic64_read(&hwc->period_left);
s64 period = hwc->sample_period;
if (unlikely(left <= -period)) {
left = period;
atomic64_set(&hwc->period_left, left);
hwc->last_period = period;
}
if (unlikely(left <= 0)) {
left += period;
atomic64_add(period, &hwc->period_left);
hwc->last_period = period;
}
atomic64_set(&hwc->prev_count, -left);
atomic64_set(&hwc->count, -left);
}
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
{
enum hrtimer_restart ret = HRTIMER_RESTART;
struct perf_sample_data data;
struct perf_counter *counter;
u64 period;
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
counter->pmu->read(counter);
data.addr = 0;
data.regs = get_irq_regs();
/*
* In case we exclude kernel IPs or are somehow not in interrupt
* context, provide the next best thing, the user IP.
*/
if ((counter->attr.exclude_kernel || !data.regs) &&
!counter->attr.exclude_user)
data.regs = task_pt_regs(current);
if (data.regs) {
if (perf_counter_overflow(counter, 0, &data))
ret = HRTIMER_NORESTART;
}
period = max_t(u64, 10000, counter->hw.sample_period);
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
return ret;
return nr;
}
static void perf_swcounter_overflow(struct perf_counter *counter,
int nmi, struct perf_sample_data *data)
{
data->period = counter->hw.last_period;
struct hw_perf_counter *hwc = &counter->hw;
u64 overflow;
perf_swcounter_update(counter);
perf_swcounter_set_period(counter);
if (perf_counter_overflow(counter, nmi, data))
/* soft-disable the counter */
;
data->period = counter->hw.last_period;
overflow = perf_swcounter_set_period(counter);
if (hwc->interrupts == MAX_INTERRUPTS)
return;
for (; overflow; overflow--) {
if (perf_counter_overflow(counter, nmi, data)) {
/*
* We inhibit the overflow from happening when
* hwc->interrupts == MAX_INTERRUPTS.
*/
break;
}
}
}
static void perf_swcounter_unthrottle(struct perf_counter *counter)
{
/*
* Nothing to do, we already reset hwc->interrupts.
*/
}
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
int nmi, struct perf_sample_data *data)
{
struct hw_perf_counter *hwc = &counter->hw;
atomic64_add(nr, &counter->count);
if (!hwc->sample_period)
return;
if (!data->regs)
return;
if (!atomic64_add_negative(nr, &hwc->period_left))
perf_swcounter_overflow(counter, nmi, data);
}
static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3479,15 +3502,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
return 1;
}
static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
int nmi, struct perf_sample_data *data)
{
int neg = atomic64_add_negative(nr, &counter->hw.count);
if (counter->hw.sample_period && !neg && data->regs)
perf_swcounter_overflow(counter, nmi, data);
}
static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
enum perf_type_id type,
u32 event, u64 nr, int nmi,
@@ -3566,26 +3580,65 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
static void perf_swcounter_read(struct perf_counter *counter)
{
perf_swcounter_update(counter);
}
static int perf_swcounter_enable(struct perf_counter *counter)
{
perf_swcounter_set_period(counter);
struct hw_perf_counter *hwc = &counter->hw;
if (hwc->sample_period) {
hwc->last_period = hwc->sample_period;
perf_swcounter_set_period(counter);
}
return 0;
}
static void perf_swcounter_disable(struct perf_counter *counter)
{
perf_swcounter_update(counter);
}
static const struct pmu perf_ops_generic = {
.enable = perf_swcounter_enable,
.disable = perf_swcounter_disable,
.read = perf_swcounter_read,
.unthrottle = perf_swcounter_unthrottle,
};
/*
* hrtimer based swcounter callback
*/
static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
{
enum hrtimer_restart ret = HRTIMER_RESTART;
struct perf_sample_data data;
struct perf_counter *counter;
u64 period;
counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
counter->pmu->read(counter);
data.addr = 0;
data.regs = get_irq_regs();
/*
* In case we exclude kernel IPs or are somehow not in interrupt
* context, provide the next best thing, the user IP.
*/
if ((counter->attr.exclude_kernel || !data.regs) &&
!counter->attr.exclude_user)
data.regs = task_pt_regs(current);
if (data.regs) {
if (perf_counter_overflow(counter, 0, &data))
ret = HRTIMER_NORESTART;
}
period = max_t(u64, 10000, counter->hw.sample_period);
hrtimer_forward_now(hrtimer, ns_to_ktime(period));
return ret;
}
/*
* Software counter: cpu wall time clock
*/
@@ -3703,17 +3756,24 @@ static const struct pmu perf_ops_task_clock = {
};
#ifdef CONFIG_EVENT_PROFILE
void perf_tpcounter_event(int event_id)
void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
int entry_size)
{
struct perf_raw_record raw = {
.size = entry_size,
.data = record,
};
struct perf_sample_data data = {
.regs = get_irq_regs(),
.addr = 0,
.addr = addr,
.raw = &raw,
};
if (!data.regs)
data.regs = task_pt_regs(current);
do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
}
EXPORT_SYMBOL_GPL(perf_tpcounter_event);
@@ -3727,6 +3787,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
{
/*
* Raw tracepoint data is a severe data leak, only allow root to
* have these.
*/
if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (ftrace_profile_enable(counter->attr.config))
return NULL;
@@ -4269,7 +4337,7 @@ void perf_counter_exit_task(struct task_struct *child)
unsigned long flags;
if (likely(!child->perf_counter_ctxp)) {
perf_counter_task(child, 0);
perf_counter_task(child, NULL, 0);
return;
}
@@ -4289,6 +4357,7 @@ void perf_counter_exit_task(struct task_struct *child)
* incremented the context's refcount before we do put_ctx below.
*/
spin_lock(&child_ctx->lock);
child->perf_counter_ctxp = NULL;
/*
* If this context is a clone; unclone it so it can't get
* swapped to another process while we're removing all
@@ -4302,9 +4371,7 @@ void perf_counter_exit_task(struct task_struct *child)
* won't get any samples after PERF_EVENT_EXIT. We can however still
* get a few PERF_EVENT_READ events.
*/
perf_counter_task(child, 0);
child->perf_counter_ctxp = NULL;
perf_counter_task(child, child_ctx, 0);
/*
* We can recurse on the same lock type through:

View File

@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
struct task_cputime cputime;
struct signal_struct *const sig = tsk->signal;
thread_group_cputimer(tsk, &cputime);
cleanup_timers(tsk->signal->cpu_timers,
cputime.utime, cputime.stime, cputime.sum_exec_runtime);
cputime_add(tsk->utime, sig->utime),
cputime_add(tsk->stime, sig->stime),
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)

View File

@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
/* We got the lock for task. */
debug_rt_mutex_lock(lock);
rt_mutex_set_owner(lock, task, 0);
spin_unlock(&lock->wait_lock);
rt_mutex_deadlock_account_lock(lock, task);
return 1;
}
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
if (ret && !waiter->task) {
/*
* Reset the return value. We might have

View File

@@ -57,7 +57,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
return NOTIFY_BAD;
break;
#ifdef CONFIG_CPU_HOTPLUG
#ifdef CONFIG_HOTPLUG_CPU
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:

View File

@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
put_online_cpus();
kfree(buffer->buffers);
free_cpumask_var(buffer->cpumask);
kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
*/
RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
if (!rb_try_to_discard(cpu_buffer, event))
if (rb_try_to_discard(cpu_buffer, event))
goto out;
/*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
* the box. Return the padding, and we will release
* the current locks, and try again.
*/
rb_advance_reader(cpu_buffer);
return event;
case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
* buffer too. A one time deal is all you get from reading
* the ring buffer from an NMI.
*/
if (likely(!in_nmi() && !oops_in_progress))
if (likely(!in_nmi()))
return 1;
tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
if (dolock)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
if (event && event->type_len == RINGBUF_TYPE_PADDING)
rb_advance_reader(cpu_buffer);
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
spin_lock(&cpu_buffer->reader_lock);
event = rb_buffer_peek(buffer, cpu, ts);
if (!event)
goto out_unlock;
if (event)
rb_advance_reader(cpu_buffer);
rb_advance_reader(cpu_buffer);
out_unlock:
if (dolock)
spin_unlock(&cpu_buffer->reader_lock);
local_irq_restore(flags);

View File

@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
}
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
int type,

View File

@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
int *ent_cpu, u64 *ent_ts);
void tracing_generic_entry_update(struct trace_entry *entry,
unsigned long flags,
int pc);
void default_wait_pipe(struct trace_iterator *iter);
void poll_wait_pipe(struct trace_iterator *iter);

View File

@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
if (event->id == event_id && event->profile_enable) {
ret = event->profile_enable(event);
break;
}

View File

@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
entry = trace_create_file("enable", 0644, call->dir, call,
enable);
if (call->id)
if (call->id && call->profile_enable)
entry = trace_create_file("id", 0444, call->dir, call,
id);

View File

@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
return -ENOSPC;
}
filter->preds[filter->n_preds] = pred;
filter->n_preds++;
list_for_each_entry(call, &ftrace_events, list) {
if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
}
replace_filter_string(call->filter, filter_string);
}
filter->preds[filter->n_preds] = pred;
filter->n_preds++;
out:
return err;
}
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
if (elt->op == OP_AND || elt->op == OP_OR) {
pred = create_logical_pred(elt->op);
if (!pred)
return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
} else
} else {
err = filter_add_subsystem_pred(ps, system,
pred, filter_string);
if (err)
filter_free_pred(pred);
}
if (err)
return err;
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
}
pred = create_pred(elt->op, operand1, operand2);
if (!pred)
return -ENOMEM;
if (call) {
err = filter_add_pred(ps, call, pred);
filter_free_pred(pred);
} else
} else {
err = filter_add_subsystem_pred(ps, system, pred,
filter_string);
if (err)
filter_free_pred(pred);
}
if (err)
return err;