Merge branch 'linus' into core/locking

This commit is contained in:
Ingo Molnar
2008-11-12 12:39:21 +01:00
1471 changed files with 23750 additions and 14886 deletions

View File

@@ -13,7 +13,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
CFLAGS_REMOVE_sched.o = -mno-spe
ifdef CONFIG_FTRACE
ifdef CONFIG_FUNCTION_TRACER
# Do not trace debug files and internal ftrace files
CFLAGS_REMOVE_lockdep.o = -pg
CFLAGS_REMOVE_lockdep_proc.o = -pg
@@ -88,7 +88,7 @@ obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FTRACE) += trace/
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o

View File

@@ -2497,7 +2497,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
list_del(&cgrp->sibling);
spin_lock(&cgrp->dentry->d_lock);
d = dget(cgrp->dentry);
cgrp->dentry = NULL;
spin_unlock(&d->d_lock);
cgroup_d_remove_dir(d);

View File

@@ -162,9 +162,13 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
struct task_struct *task)
{
struct freezer *freezer;
int retval;
/* Anything frozen can't move or be moved to/from */
/*
* Anything frozen can't move or be moved to/from.
*
* Since orig_freezer->state == FROZEN means that @task has been
* frozen, so it's sufficient to check the latter condition.
*/
if (is_task_frozen_enough(task))
return -EBUSY;
@@ -173,13 +177,7 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
if (freezer->state == CGROUP_FROZEN)
return -EBUSY;
retval = 0;
task_lock(task);
freezer = task_freezer(task);
if (freezer->state == CGROUP_FROZEN)
retval = -EBUSY;
task_unlock(task);
return retval;
return 0;
}
static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
@@ -190,8 +188,9 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
freezer = task_freezer(task);
task_unlock(task);
BUG_ON(freezer->state == CGROUP_FROZEN);
spin_lock_irq(&freezer->lock);
BUG_ON(freezer->state == CGROUP_FROZEN);
/* Locking avoids race with FREEZING -> THAWED transitions. */
if (freezer->state == CGROUP_FREEZING)
freeze_task(task, true);
@@ -276,25 +275,18 @@ static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
return num_cant_freeze_now ? -EBUSY : 0;
}
static int unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer)
{
struct cgroup_iter it;
struct task_struct *task;
cgroup_iter_start(cgroup, &it);
while ((task = cgroup_iter_next(cgroup, &it))) {
int do_wake;
task_lock(task);
do_wake = __thaw_process(task);
task_unlock(task);
if (do_wake)
wake_up_process(task);
thaw_process(task);
}
cgroup_iter_end(cgroup, &it);
freezer->state = CGROUP_THAWED;
return 0;
freezer->state = CGROUP_THAWED;
}
static int freezer_change_state(struct cgroup *cgroup,
@@ -304,27 +296,22 @@ static int freezer_change_state(struct cgroup *cgroup,
int retval = 0;
freezer = cgroup_freezer(cgroup);
spin_lock_irq(&freezer->lock);
update_freezer_state(cgroup, freezer);
if (goal_state == freezer->state)
goto out;
switch (freezer->state) {
switch (goal_state) {
case CGROUP_THAWED:
unfreeze_cgroup(cgroup, freezer);
break;
case CGROUP_FROZEN:
retval = try_to_freeze_cgroup(cgroup, freezer);
break;
case CGROUP_FREEZING:
if (goal_state == CGROUP_FROZEN) {
/* Userspace is retrying after
* "/bin/echo FROZEN > freezer.state" returned -EBUSY */
retval = try_to_freeze_cgroup(cgroup, freezer);
break;
}
/* state == FREEZING and goal_state == THAWED, so unfreeze */
case CGROUP_FROZEN:
retval = unfreeze_cgroup(cgroup, freezer);
break;
default:
break;
BUG();
}
out:
spin_unlock_irq(&freezer->lock);

View File

@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
#endif
};
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);

View File

@@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk)
if (sig) {
flush_sigqueue(&sig->shared_pending);
taskstats_tgid_free(sig);
/*
* Make sure ->signal can't go away under rq->lock,
* see account_group_exec_runtime().
*/
task_rq_unlock_wait(tsk);
__cleanup_signal(sig);
}
}

View File

@@ -121,16 +121,7 @@ void cancel_freezing(struct task_struct *p)
}
}
/*
* Wake up a frozen process
*
* task_lock() is needed to prevent the race with refrigerator() which may
* occur if the freezing of tasks fails. Namely, without the lock, if the
* freezing of tasks failed, thaw_tasks() might have run before a task in
* refrigerator() could call frozen_process(), in which case the task would be
* frozen and no one would thaw it.
*/
int __thaw_process(struct task_struct *p)
static int __thaw_process(struct task_struct *p)
{
if (frozen(p)) {
p->flags &= ~PF_FROZEN;
@@ -140,6 +131,15 @@ int __thaw_process(struct task_struct *p)
return 0;
}
/*
* Wake up a frozen process
*
* task_lock() is needed to prevent the race with refrigerator() which may
* occur if the freezing of tasks fails. Namely, without the lock, if the
* freezing of tasks failed, thaw_tasks() might have run before a task in
* refrigerator() could call frozen_process(), in which case the task would be
* frozen and no one would thaw it.
*/
int thaw_process(struct task_struct *p)
{
task_lock(p);

View File

@@ -1209,6 +1209,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
enum hrtimer_restart (*fn)(struct hrtimer *);
struct hrtimer *timer;
int restart;
int emulate_hardirq_ctx = 0;
timer = list_entry(cpu_base->cb_pending.next,
struct hrtimer, cb_entry);
@@ -1217,10 +1218,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
timer_stats_account_hrtimer(timer);
fn = timer->function;
/*
* A timer might have been added to the cb_pending list
* when it was migrated during a cpu-offline operation.
* Emulate hardirq context for such timers.
*/
if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
emulate_hardirq_ctx = 1;
__remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
spin_unlock_irq(&cpu_base->lock);
restart = fn(timer);
if (unlikely(emulate_hardirq_ctx)) {
local_irq_disable();
restart = fn(timer);
local_irq_enable();
} else
restart = fn(timer);
spin_lock_irq(&cpu_base->lock);

View File

@@ -220,7 +220,7 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
}
}
void register_default_affinity_proc(void)
static void register_default_affinity_proc(void)
{
#ifdef CONFIG_SMP
proc_create("irq/default_smp_affinity", 0600, NULL,

View File

@@ -2173,12 +2173,11 @@ void early_boot_irqs_on(void)
/*
* Hardirqs will be enabled:
*/
void trace_hardirqs_on_caller(unsigned long a0)
void trace_hardirqs_on_caller(unsigned long ip)
{
struct task_struct *curr = current;
unsigned long ip;
time_hardirqs_on(CALLER_ADDR0, a0);
time_hardirqs_on(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2192,7 +2191,6 @@ void trace_hardirqs_on_caller(unsigned long a0)
}
/* we'll do an OFF -> ON transition: */
curr->hardirqs_enabled = 1;
ip = (unsigned long) __builtin_return_address(0);
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
return;
@@ -2228,11 +2226,11 @@ EXPORT_SYMBOL(trace_hardirqs_on);
/*
* Hardirqs were disabled:
*/
void trace_hardirqs_off_caller(unsigned long a0)
void trace_hardirqs_off_caller(unsigned long ip)
{
struct task_struct *curr = current;
time_hardirqs_off(CALLER_ADDR0, a0);
time_hardirqs_off(CALLER_ADDR0, ip);
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2245,7 +2243,7 @@ void trace_hardirqs_off_caller(unsigned long a0)
* We have done an ON -> OFF transition:
*/
curr->hardirqs_enabled = 0;
curr->hardirq_disable_ip = _RET_IP_;
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
debug_atomic_inc(&hardirqs_off_events);
} else
@@ -3426,9 +3424,10 @@ retry:
}
printk(" ignoring it.\n");
unlock = 0;
} else {
if (count != 10)
printk(KERN_CONT " locked it.\n");
}
if (count != 10)
printk(" locked it.\n");
do_each_thread(g, p) {
/*

View File

@@ -96,7 +96,7 @@ config SUSPEND
config PM_TEST_SUSPEND
bool "Test suspend/resume and wakealarm during bootup"
depends on SUSPEND && PM_DEBUG && RTC_LIB=y
depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
---help---
This option will let you suspend your machine during bootup, and
make it wake up a few seconds later using an RTC wakeup alarm.

View File

@@ -232,45 +232,6 @@ static inline void boot_delay_msec(void)
}
#endif
/*
* Return the number of unread characters in the log buffer.
*/
static int log_buf_get_len(void)
{
return logged_chars;
}
/*
* Copy a range of characters from the log buffer.
*/
int log_buf_copy(char *dest, int idx, int len)
{
int ret, max;
bool took_lock = false;
if (!oops_in_progress) {
spin_lock_irq(&logbuf_lock);
took_lock = true;
}
max = log_buf_get_len();
if (idx < 0 || idx >= max) {
ret = -1;
} else {
if (len > max)
len = max;
ret = len;
idx += (log_end - max);
while (len-- > 0)
dest[len] = LOG_BUF(idx + len);
}
if (took_lock)
spin_unlock_irq(&logbuf_lock);
return ret;
}
/*
* Commands to do_syslog:
*

View File

@@ -102,7 +102,7 @@ int profile_setup(char *str)
__setup("profile=", profile_setup);
int profile_init(void)
int __ref profile_init(void)
{
int buffer_bytes;
if (!prof_on)

View File

@@ -17,6 +17,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/device.h>
#include <linux/pfn.h>
#include <asm/io.h>
@@ -522,7 +523,7 @@ static void __init __reserve_region_with_split(struct resource *root,
{
struct resource *parent = root;
struct resource *conflict;
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
if (!res)
return;
@@ -571,7 +572,7 @@ static void __init __reserve_region_with_split(struct resource *root,
}
void reserve_region_with_split(struct resource *root,
void __init reserve_region_with_split(struct resource *root,
resource_size_t start, resource_size_t end,
const char *name)
{
@@ -849,7 +850,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
continue;
if (p->end < addr)
continue;
if (p->start <= addr && (p->end >= addr + size - 1))
if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
continue;
printk(KERN_WARNING "resource map sanity check conflict: "
"0x%llx 0x%llx 0x%llx 0x%llx %s\n",

View File

@@ -386,7 +386,6 @@ struct cfs_rq {
u64 exec_clock;
u64 min_vruntime;
u64 pair_start;
struct rb_root tasks_timeline;
struct rb_node *rb_leftmost;
@@ -398,9 +397,9 @@ struct cfs_rq {
* 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running).
*/
struct sched_entity *curr, *next;
struct sched_entity *curr, *next, *last;
unsigned long nr_spread_over;
unsigned int nr_spread_over;
#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -970,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
}
}
void task_rq_unlock_wait(struct task_struct *p)
{
struct rq *rq = task_rq(p);
smp_mb(); /* spin-unlock-wait is not a full memory barrier */
spin_unlock_wait(&rq->lock);
}
static void __task_rq_unlock(struct rq *rq)
__releases(rq->lock)
{
@@ -1806,7 +1813,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
/*
* Buddy candidates are cache hot:
*/
if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next))
if (sched_feat(CACHE_HOT_BUDDY) &&
(&p->se == cfs_rq_of(&p->se)->next ||
&p->se == cfs_rq_of(&p->se)->last))
return 1;
if (p->sched_class != &fair_sched_class)
@@ -3344,7 +3353,7 @@ small_imbalance:
} else
this_load_per_task = cpu_avg_load_per_task(this_cpu);
if (max_load - this_load + 2*busiest_load_per_task >=
if (max_load - this_load + busiest_load_per_task >=
busiest_load_per_task * imbn) {
*imbalance = busiest_load_per_task;
return busiest;
@@ -6876,15 +6885,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
struct sched_domain *tmp;
/* Remove the sched domains which do not contribute to scheduling. */
for (tmp = sd; tmp; tmp = tmp->parent) {
for (tmp = sd; tmp; ) {
struct sched_domain *parent = tmp->parent;
if (!parent)
break;
if (sd_parent_degenerate(tmp, parent)) {
tmp->parent = parent->parent;
if (parent->parent)
parent->parent->child = tmp;
}
} else
tmp = tmp->parent;
}
if (sd && sd_degenerate(sd)) {
@@ -7673,6 +7684,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
error:
free_sched_groups(cpu_map, tmpmask);
SCHED_CPUMASK_FREE((void *)allmasks);
kfree(rd);
return -ENOMEM;
#endif
}

View File

@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
last = __pick_last_entity(cfs_rq);
if (last)
max_vruntime = last->vruntime;
min_vruntime = rq->cfs.min_vruntime;
min_vruntime = cfs_rq->min_vruntime;
rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
spin_unlock_irqrestore(&rq->lock, flags);
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SPLIT_NS(spread0));
SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
P(yld_exp_empty);
P(yld_act_empty);
P(yld_both_empty);
P(yld_count);
P(sched_switch);
P(sched_count);
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
P(bkl_count);
#undef P
#endif
SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
#undef P
#undef PN
#ifdef CONFIG_SCHEDSTATS
#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
P(yld_exp_empty);
P(yld_act_empty);
P(yld_both_empty);
P(yld_count);
P(sched_switch);
P(sched_count);
P(sched_goidle);
P(ttwu_count);
P(ttwu_local);
P(bkl_count);
#undef P
#endif
print_cfs_stats(m, cpu);
print_rt_stats(m, cpu);
@@ -319,7 +320,7 @@ static int __init init_sched_debug_procfs(void)
{
struct proc_dir_entry *pe;
pe = proc_create("sched_debug", 0644, NULL, &sched_debug_fops);
pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
if (!pe)
return -ENOMEM;
return 0;

View File

@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return se->parent;
}
/* return depth at which a sched entity is present in the hierarchy */
static inline int depth_se(struct sched_entity *se)
{
int depth = 0;
for_each_sched_entity(se)
depth++;
return depth;
}
static void
find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
int se_depth, pse_depth;
/*
* preemption test can be made between sibling entities who are in the
* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
* both tasks until we find their ancestors who are siblings of common
* parent.
*/
/* First walk up until both entities are at same depth */
se_depth = depth_se(*se);
pse_depth = depth_se(*pse);
while (se_depth > pse_depth) {
se_depth--;
*se = parent_entity(*se);
}
while (pse_depth > se_depth) {
pse_depth--;
*pse = parent_entity(*pse);
}
while (!is_same_group(*se, *pse)) {
*se = parent_entity(*se);
*pse = parent_entity(*pse);
}
}
#else /* CONFIG_FAIR_GROUP_SCHED */
static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
return NULL;
}
static inline void
find_matching_se(struct sched_entity **se, struct sched_entity **pse)
{
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
return se->vruntime - cfs_rq->min_vruntime;
}
static void update_min_vruntime(struct cfs_rq *cfs_rq)
{
u64 vruntime = cfs_rq->min_vruntime;
if (cfs_rq->curr)
vruntime = cfs_rq->curr->vruntime;
if (cfs_rq->rb_leftmost) {
struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
struct sched_entity,
run_node);
if (vruntime == cfs_rq->min_vruntime)
vruntime = se->vruntime;
else
vruntime = min_vruntime(vruntime, se->vruntime);
}
cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
}
/*
* Enqueue an entity into the rb-tree:
*/
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* Maintain a cache of leftmost tree entries (it is frequently
* used):
*/
if (leftmost) {
if (leftmost)
cfs_rq->rb_leftmost = &se->run_node;
/*
* maintain cfs_rq->min_vruntime to be a monotonic increasing
* value tracking the leftmost vruntime in the tree.
*/
cfs_rq->min_vruntime =
max_vruntime(cfs_rq->min_vruntime, se->vruntime);
}
rb_link_node(&se->run_node, parent, link);
rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->rb_leftmost == &se->run_node) {
struct rb_node *next_node;
struct sched_entity *next;
next_node = rb_next(&se->run_node);
cfs_rq->rb_leftmost = next_node;
if (next_node) {
next = rb_entry(next_node,
struct sched_entity, run_node);
cfs_rq->min_vruntime =
max_vruntime(cfs_rq->min_vruntime,
next->vruntime);
}
}
if (cfs_rq->next == se)
cfs_rq->next = NULL;
rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
}
static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
{
return cfs_rq->rb_leftmost;
}
static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
{
return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node);
struct rb_node *left = cfs_rq->rb_leftmost;
if (!left)
return NULL;
return rb_entry(left, struct sched_entity, run_node);
}
static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
{
struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
schedstat_add(cfs_rq, exec_clock, delta_exec);
delta_exec_weighted = calc_delta_fair(delta_exec, curr);
curr->vruntime += delta_exec_weighted;
update_min_vruntime(cfs_rq);
}
static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
static void
place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
{
u64 vruntime;
if (first_fair(cfs_rq)) {
vruntime = min_vruntime(cfs_rq->min_vruntime,
__pick_next_entity(cfs_rq)->vruntime);
} else
vruntime = cfs_rq->min_vruntime;
u64 vruntime = cfs_rq->min_vruntime;
/*
* The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
__enqueue_entity(cfs_rq, se);
}
static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
if (cfs_rq->last == se)
cfs_rq->last = NULL;
if (cfs_rq->next == se)
cfs_rq->next = NULL;
}
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
#endif
}
clear_buddies(cfs_rq, se);
if (se != cfs_rq->curr)
__dequeue_entity(cfs_rq, se);
account_entity_dequeue(cfs_rq, se);
update_min_vruntime(cfs_rq);
}
/*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
se->prev_sum_exec_runtime = se->sum_exec_runtime;
}
static struct sched_entity *
pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
struct rq *rq = rq_of(cfs_rq);
u64 pair_slice = rq->clock - cfs_rq->pair_start;
if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
cfs_rq->pair_start = rq->clock;
return se;
}
return cfs_rq->next;
}
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
{
struct sched_entity *se = NULL;
struct sched_entity *se = __pick_next_entity(cfs_rq);
if (first_fair(cfs_rq)) {
se = __pick_next_entity(cfs_rq);
se = pick_next(cfs_rq, se);
set_next_entity(cfs_rq, se);
}
if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
return cfs_rq->next;
if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
return cfs_rq->last;
return se;
}
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
if (unlikely(cfs_rq->nr_running == 1))
return;
clear_buddies(cfs_rq, se);
if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
update_rq_clock(rq);
/*
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
return 0;
if (!sync && sched_feat(SYNC_WAKEUPS) &&
curr->se.avg_overlap < sysctl_sched_migration_cost &&
p->se.avg_overlap < sysctl_sched_migration_cost)
sync = 1;
if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
p->se.avg_overlap > sysctl_sched_migration_cost))
sync = 0;
/*
* If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
* More easily preempt - nice tasks, while not making it harder for
* + nice tasks.
*/
if (sched_feat(ASYM_GRAN))
gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
return gran;
}
/*
* Should 'se' preempt 'curr'.
*
* |s1
* |s2
* |s3
* g
* |<--->|c
*
* w(c, s1) = -1
* w(c, s2) = 0
* w(c, s3) = 1
*
*/
static int
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
{
s64 gran, vdiff = curr->vruntime - se->vruntime;
if (vdiff <= 0)
return -1;
gran = wakeup_gran(curr);
if (vdiff > gran)
return 1;
return 0;
}
static void set_last_buddy(struct sched_entity *se)
{
for_each_sched_entity(se)
cfs_rq_of(se)->last = se;
}
static void set_next_buddy(struct sched_entity *se)
{
for_each_sched_entity(se)
cfs_rq_of(se)->next = se;
}
/*
* Preempt the current task with a newly woken task if needed:
*/
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
{
struct task_struct *curr = rq->curr;
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
struct sched_entity *se = &curr->se, *pse = &p->se;
s64 delta_exec;
if (unlikely(rt_prio(p->prio))) {
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
update_rq_clock(rq);
update_curr(cfs_rq);
resched_task(curr);
return;
}
if (unlikely(p->sched_class != &fair_sched_class))
return;
if (unlikely(se == pse))
return;
cfs_rq_of(pse)->next = pse;
/*
* Only set the backward buddy when the current task is still on the
* rq. This can happen when a wakeup gets interleaved with schedule on
* the ->pre_schedule() or idle_balance() point, either of which can
* drop the rq lock.
*
* Also, during early boot the idle thread is in the fair class, for
* obvious reasons its a bad idea to schedule back to the idle thread.
*/
if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
set_last_buddy(se);
set_next_buddy(pse);
/*
* We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
return;
}
delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
if (delta_exec > wakeup_gran(pse))
resched_task(curr);
find_matching_se(&se, &pse);
while (se) {
BUG_ON(!pse);
if (wakeup_preempt_entity(se, pse) == 1) {
resched_task(curr);
break;
}
se = parent_entity(se);
pse = parent_entity(pse);
}
}
static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
do {
se = pick_next_entity(cfs_rq);
set_next_entity(cfs_rq, se);
cfs_rq = group_cfs_rq(se);
} while (cfs_rq);
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {
.enqueue_task = enqueue_task_fair,
.dequeue_task = dequeue_task_fair,
.yield_task = yield_task_fair,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_wakeup,
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {
.put_prev_task = put_prev_task_fair,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
.load_balance = load_balance_fair,
.move_one_task = move_one_task_fair,
#endif

View File

@@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1)
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
SCHED_FEAT(ASYM_EFF_LOAD, 1)
SCHED_FEAT(WAKEUP_OVERLAP, 0)
SCHED_FEAT(LAST_BUDDY, 1)

View File

@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
/* dequeue is not valid, we print a debug message there: */
.dequeue_task = dequeue_task_idle,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_idle,
#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_curr_idle,
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
.put_prev_task = put_prev_task_idle,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_idle,
.load_balance = load_balance_idle,
.move_one_task = move_one_task_idle,
#endif

View File

@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
.enqueue_task = enqueue_task_rt,
.dequeue_task = dequeue_task_rt,
.yield_task = yield_task_rt,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
#endif /* CONFIG_SMP */
.check_preempt_curr = check_preempt_curr_rt,
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
.put_prev_task = put_prev_task_rt,
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_rt,
.load_balance = load_balance_rt,
.move_one_task = move_one_task_rt,
.set_cpus_allowed = set_cpus_allowed_rt,

View File

@@ -1144,7 +1144,8 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
struct task_struct * p;
for_each_process(p) {
if (p->pid > 1 && !same_thread_group(p, current)) {
if (task_pid_vnr(p) > 1 &&
!same_thread_group(p, current)) {
int err = group_send_sig_info(sig, info, p);
++count;
if (err != -EPERM)

View File

@@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data)
{
/* Wait for response */
do {
/*
* We need to see the flags store in the IPI handler
*/
smp_mb();
if (!(data->flags & CSD_FLAG_WAIT))
break;
cpu_relax();
@@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
list_add_tail(&data->list, &dst->list);
spin_unlock_irqrestore(&dst->lock, flags);
/*
* Make the list addition visible before sending the ipi.
*/
smp_mb();
if (ipi)
arch_send_call_function_single_ipi(cpu);
@@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void)
* Need to see other stores to list head for checking whether
* list is empty without holding q->lock
*/
smp_mb();
smp_read_barrier_depends();
while (!list_empty(&q->list)) {
unsigned int data_flags;
@@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void)
/*
* See comment on outer loop
*/
smp_mb();
smp_read_barrier_depends();
}
}
@@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
list_add_tail_rcu(&data->csd.list, &call_function_queue);
spin_unlock_irqrestore(&call_function_lock, flags);
/*
* Make the list addition visible before sending the ipi.
*/
smp_mb();
/* Send a message to all CPUs in the map */
arch_send_call_function_ipi(mask);

View File

@@ -269,10 +269,11 @@ void irq_enter(void)
{
int cpu = smp_processor_id();
if (idle_cpu(cpu) && !in_interrupt())
if (idle_cpu(cpu) && !in_interrupt()) {
__irq_enter();
tick_check_idle(cpu);
__irq_enter();
} else
__irq_enter();
}
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED

View File

@@ -474,7 +474,7 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
{
.ctl_name = CTL_UNNUMBERED,
.procname = "ftrace_enabled",

View File

@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
*/
static void tick_nohz_kick_tick(int cpu)
{
#if 0
/* Switch back to 2.6.27 behaviour */
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t delta, now;
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
return;
tick_nohz_restart(ts, now);
#endif
}
#else

View File

@@ -112,6 +112,44 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
tbase_get_deferrable(timer->base));
}
static unsigned long round_jiffies_common(unsigned long j, int cpu,
bool force_up)
{
int rem;
unsigned long original = j;
/*
* We don't want all cpus firing their timers at once hitting the
* same lock or cachelines, so we skew each extra cpu with an extra
* 3 jiffies. This 3 jiffies came originally from the mm/ code which
* already did this.
* The skew is done by adding 3*cpunr, then round, then subtract this
* extra offset again.
*/
j += cpu * 3;
rem = j % HZ;
/*
* If the target jiffie is just after a whole second (which can happen
* due to delays of the timer irq, long irq off times etc etc) then
* we should round down to the whole second, not up. Use 1/4th second
* as cutoff for this rounding as an extreme upper bound for this.
* But never round down if @force_up is set.
*/
if (rem < HZ/4 && !force_up) /* round down */
j = j - rem;
else /* round up */
j = j - rem + HZ;
/* now that we have rounded, subtract the extra skew again */
j -= cpu * 3;
if (j <= jiffies) /* rounding ate our timeout entirely; */
return original;
return j;
}
/**
* __round_jiffies - function to round jiffies to a full second
* @j: the time in (absolute) jiffies that should be rounded
@@ -134,38 +172,7 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
*/
unsigned long __round_jiffies(unsigned long j, int cpu)
{
int rem;
unsigned long original = j;
/*
* We don't want all cpus firing their timers at once hitting the
* same lock or cachelines, so we skew each extra cpu with an extra
* 3 jiffies. This 3 jiffies came originally from the mm/ code which
* already did this.
* The skew is done by adding 3*cpunr, then round, then subtract this
* extra offset again.
*/
j += cpu * 3;
rem = j % HZ;
/*
* If the target jiffie is just after a whole second (which can happen
* due to delays of the timer irq, long irq off times etc etc) then
* we should round down to the whole second, not up. Use 1/4th second
* as cutoff for this rounding as an extreme upper bound for this.
*/
if (rem < HZ/4) /* round down */
j = j - rem;
else /* round up */
j = j - rem + HZ;
/* now that we have rounded, subtract the extra skew again */
j -= cpu * 3;
if (j <= jiffies) /* rounding ate our timeout entirely; */
return original;
return j;
return round_jiffies_common(j, cpu, false);
}
EXPORT_SYMBOL_GPL(__round_jiffies);
@@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
*/
unsigned long __round_jiffies_relative(unsigned long j, int cpu)
{
/*
* In theory the following code can skip a jiffy in case jiffies
* increments right between the addition and the later subtraction.
* However since the entire point of this function is to use approximate
* timeouts, it's entirely ok to not handle that.
*/
return __round_jiffies(j + jiffies, cpu) - jiffies;
unsigned long j0 = jiffies;
/* Use j0 because jiffies might change while we run */
return round_jiffies_common(j + j0, cpu, false) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_relative);
@@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
*/
unsigned long round_jiffies(unsigned long j)
{
return __round_jiffies(j, raw_smp_processor_id());
return round_jiffies_common(j, raw_smp_processor_id(), false);
}
EXPORT_SYMBOL_GPL(round_jiffies);
@@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j)
}
EXPORT_SYMBOL_GPL(round_jiffies_relative);
/**
* __round_jiffies_up - function to round jiffies up to a full second
* @j: the time in (absolute) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
*
* This is the same as __round_jiffies() except that it will never
* round down. This is useful for timeouts for which the exact time
* of firing does not matter too much, as long as they don't fire too
* early.
*/
unsigned long __round_jiffies_up(unsigned long j, int cpu)
{
return round_jiffies_common(j, cpu, true);
}
EXPORT_SYMBOL_GPL(__round_jiffies_up);
/**
* __round_jiffies_up_relative - function to round jiffies up to a full second
* @j: the time in (relative) jiffies that should be rounded
* @cpu: the processor number on which the timeout will happen
*
* This is the same as __round_jiffies_relative() except that it will never
* round down. This is useful for timeouts for which the exact time
* of firing does not matter too much, as long as they don't fire too
* early.
*/
unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
{
unsigned long j0 = jiffies;
/* Use j0 because jiffies might change while we run */
return round_jiffies_common(j + j0, cpu, true) - j0;
}
EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
/**
* round_jiffies_up - function to round jiffies up to a full second
* @j: the time in (absolute) jiffies that should be rounded
*
* This is the same as round_jiffies() except that it will never
* round down. This is useful for timeouts for which the exact time
* of firing does not matter too much, as long as they don't fire too
* early.
*/
unsigned long round_jiffies_up(unsigned long j)
{
return round_jiffies_common(j, raw_smp_processor_id(), true);
}
EXPORT_SYMBOL_GPL(round_jiffies_up);
/**
* round_jiffies_up_relative - function to round jiffies up to a full second
* @j: the time in (relative) jiffies that should be rounded
*
* This is the same as round_jiffies_relative() except that it will never
* round down. This is useful for timeouts for which the exact time
* of firing does not matter too much, as long as they don't fire too
* early.
*/
unsigned long round_jiffies_up_relative(unsigned long j)
{
return __round_jiffies_up_relative(j, raw_smp_processor_id());
}
EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
static inline void set_running_timer(struct tvec_base *base,
struct timer_list *timer)

View File

@@ -1,13 +1,13 @@
#
# Architectures that offer an FTRACE implementation should select HAVE_FTRACE:
# Architectures that offer an FUNCTION_TRACER implementation should
# select HAVE_FUNCTION_TRACER:
#
config NOP_TRACER
bool
config HAVE_FTRACE
config HAVE_FUNCTION_TRACER
bool
select NOP_TRACER
config HAVE_DYNAMIC_FTRACE
bool
@@ -25,12 +25,15 @@ config TRACING
bool
select DEBUG_FS
select RING_BUFFER
select STACKTRACE
select STACKTRACE if STACKTRACE_SUPPORT
select TRACEPOINTS
select NOP_TRACER
config FTRACE
menu "Tracers"
config FUNCTION_TRACER
bool "Kernel Function Tracer"
depends on HAVE_FTRACE
depends on HAVE_FUNCTION_TRACER
depends on DEBUG_KERNEL
select FRAME_POINTER
select TRACING
@@ -49,7 +52,6 @@ config IRQSOFF_TRACER
default n
depends on TRACE_IRQFLAGS_SUPPORT
depends on GENERIC_TIME
depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACE_IRQFLAGS
select TRACING
@@ -73,7 +75,6 @@ config PREEMPT_TRACER
default n
depends on GENERIC_TIME
depends on PREEMPT
depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select TRACER_MAX_TRACE
@@ -101,7 +102,6 @@ config SYSPROF_TRACER
config SCHED_TRACER
bool "Scheduling Latency Tracer"
depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select CONTEXT_SWITCH_TRACER
@@ -112,7 +112,6 @@ config SCHED_TRACER
config CONTEXT_SWITCH_TRACER
bool "Trace process context switches"
depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select MARKERS
@@ -122,9 +121,9 @@ config CONTEXT_SWITCH_TRACER
config BOOT_TRACER
bool "Trace boot initcalls"
depends on HAVE_FTRACE
depends on DEBUG_KERNEL
select TRACING
select CONTEXT_SWITCH_TRACER
help
This tracer helps developers to optimize boot times: it records
the timings of the initcalls and traces key events and the identity
@@ -141,9 +140,9 @@ config BOOT_TRACER
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FTRACE
depends on HAVE_FUNCTION_TRACER
depends on DEBUG_KERNEL
select FTRACE
select FUNCTION_TRACER
select STACKTRACE
help
This special tracer records the maximum stack footprint of the
@@ -160,7 +159,7 @@ config STACK_TRACER
config DYNAMIC_FTRACE
bool "enable/disable ftrace tracepoints dynamically"
depends on FTRACE
depends on FUNCTION_TRACER
depends on HAVE_DYNAMIC_FTRACE
depends on DEBUG_KERNEL
default y
@@ -170,7 +169,7 @@ config DYNAMIC_FTRACE
with a No-Op instruction) as they are called. A table is
created to dynamically enable them again.
This way a CONFIG_FTRACE kernel is slightly larger, but otherwise
This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but otherwise
has native performance as long as no tracing is active.
The changes to the code are done by a kernel thread that
@@ -195,3 +194,5 @@ config FTRACE_STARTUP_TEST
a series of tests are made to verify that the tracer is
functioning properly. It will do tests on all the configured
tracers of ftrace.
endmenu

View File

@@ -1,7 +1,7 @@
# Do not instrument the tracer itself:
ifdef CONFIG_FTRACE
ifdef CONFIG_FUNCTION_TRACER
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS))
@@ -10,13 +10,13 @@ CFLAGS_trace_selftest_dynamic.o = -pg
obj-y += trace_selftest_dynamic.o
endif
obj-$(CONFIG_FTRACE) += libftrace.o
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FTRACE) += trace_functions.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o

View File

@@ -25,13 +25,24 @@
#include <linux/ftrace.h>
#include <linux/sysctl.h>
#include <linux/ctype.h>
#include <linux/hash.h>
#include <linux/list.h>
#include <asm/ftrace.h>
#include "trace.h"
#define FTRACE_WARN_ON(cond) \
do { \
if (WARN_ON(cond)) \
ftrace_kill(); \
} while (0)
#define FTRACE_WARN_ON_ONCE(cond) \
do { \
if (WARN_ON_ONCE(cond)) \
ftrace_kill(); \
} while (0)
/* ftrace_enabled is a method to turn ftrace on or off */
int ftrace_enabled __read_mostly;
static int last_ftrace_enabled;
@@ -153,21 +164,8 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
}
#ifdef CONFIG_DYNAMIC_FTRACE
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
/*
* The hash lock is only needed when the recording of the mcount
* callers are dynamic. That is, by the caller themselves and
* not recorded via the compilation.
*/
static DEFINE_SPINLOCK(ftrace_hash_lock);
#define ftrace_hash_lock(flags) spin_lock_irqsave(&ftrace_hash_lock, flags)
#define ftrace_hash_unlock(flags) \
spin_unlock_irqrestore(&ftrace_hash_lock, flags)
#else
/* This is protected via the ftrace_lock with MCOUNT_RECORD. */
#define ftrace_hash_lock(flags) do { (void)(flags); } while (0)
#define ftrace_hash_unlock(flags) do { } while(0)
# error Dynamic ftrace depends on MCOUNT_RECORD
#endif
/*
@@ -178,8 +176,6 @@ static DEFINE_SPINLOCK(ftrace_hash_lock);
*/
static unsigned long mcount_addr = MCOUNT_ADDR;
static struct task_struct *ftraced_task;
enum {
FTRACE_ENABLE_CALLS = (1 << 0),
FTRACE_DISABLE_CALLS = (1 << 1),
@@ -190,13 +186,9 @@ enum {
static int ftrace_filtered;
static int tracing_on;
static int frozen_record_count;
static struct hlist_head ftrace_hash[FTRACE_HASHSIZE];
static LIST_HEAD(ftrace_new_addrs);
static DEFINE_PER_CPU(int, ftrace_shutdown_disable_cpu);
static DEFINE_MUTEX(ftraced_lock);
static DEFINE_MUTEX(ftrace_regex_lock);
struct ftrace_page {
@@ -214,16 +206,13 @@ struct ftrace_page {
static struct ftrace_page *ftrace_pages_start;
static struct ftrace_page *ftrace_pages;
static int ftraced_trigger;
static int ftraced_suspend;
static int ftraced_stop;
static int ftrace_record_suspend;
static struct dyn_ftrace *ftrace_free_records;
#ifdef CONFIG_KPROBES
static int frozen_record_count;
static inline void freeze_record(struct dyn_ftrace *rec)
{
if (!(rec->flags & FTRACE_FL_FROZEN)) {
@@ -250,72 +239,6 @@ static inline int record_frozen(struct dyn_ftrace *rec)
# define record_frozen(rec) ({ 0; })
#endif /* CONFIG_KPROBES */
int skip_trace(unsigned long ip)
{
unsigned long fl;
struct dyn_ftrace *rec;
struct hlist_node *t;
struct hlist_head *head;
if (frozen_record_count == 0)
return 0;
head = &ftrace_hash[hash_long(ip, FTRACE_HASHBITS)];
hlist_for_each_entry_rcu(rec, t, head, node) {
if (rec->ip == ip) {
if (record_frozen(rec)) {
if (rec->flags & FTRACE_FL_FAILED)
return 1;
if (!(rec->flags & FTRACE_FL_CONVERTED))
return 1;
if (!tracing_on || !ftrace_enabled)
return 1;
if (ftrace_filtered) {
fl = rec->flags & (FTRACE_FL_FILTER |
FTRACE_FL_NOTRACE);
if (!fl || (fl & FTRACE_FL_NOTRACE))
return 1;
}
}
break;
}
}
return 0;
}
static inline int
ftrace_ip_in_hash(unsigned long ip, unsigned long key)
{
struct dyn_ftrace *p;
struct hlist_node *t;
int found = 0;
hlist_for_each_entry_rcu(p, t, &ftrace_hash[key], node) {
if (p->ip == ip) {
found = 1;
break;
}
}
return found;
}
static inline void
ftrace_add_hash(struct dyn_ftrace *node, unsigned long key)
{
hlist_add_head_rcu(&node->node, &ftrace_hash[key]);
}
/* called from kstop_machine */
static inline void ftrace_del_hash(struct dyn_ftrace *node)
{
hlist_del(&node->node);
}
static void ftrace_free_rec(struct dyn_ftrace *rec)
{
rec->ip = (unsigned long)ftrace_free_records;
@@ -346,7 +269,6 @@ void ftrace_release(void *start, unsigned long size)
}
}
spin_unlock(&ftrace_lock);
}
static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
@@ -358,10 +280,8 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
rec = ftrace_free_records;
if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
WARN_ON_ONCE(1);
FTRACE_WARN_ON_ONCE(1);
ftrace_free_records = NULL;
ftrace_disabled = 1;
ftrace_enabled = 0;
return NULL;
}
@@ -371,76 +291,36 @@ static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
}
if (ftrace_pages->index == ENTRIES_PER_PAGE) {
if (!ftrace_pages->next)
return NULL;
if (!ftrace_pages->next) {
/* allocate another page */
ftrace_pages->next =
(void *)get_zeroed_page(GFP_KERNEL);
if (!ftrace_pages->next)
return NULL;
}
ftrace_pages = ftrace_pages->next;
}
return &ftrace_pages->records[ftrace_pages->index++];
}
static void
static struct dyn_ftrace *
ftrace_record_ip(unsigned long ip)
{
struct dyn_ftrace *node;
unsigned long flags;
unsigned long key;
int resched;
int cpu;
struct dyn_ftrace *rec;
if (!ftrace_enabled || ftrace_disabled)
return;
return NULL;
resched = need_resched();
preempt_disable_notrace();
rec = ftrace_alloc_dyn_node(ip);
if (!rec)
return NULL;
/*
* We simply need to protect against recursion.
* Use the the raw version of smp_processor_id and not
* __get_cpu_var which can call debug hooks that can
* cause a recursive crash here.
*/
cpu = raw_smp_processor_id();
per_cpu(ftrace_shutdown_disable_cpu, cpu)++;
if (per_cpu(ftrace_shutdown_disable_cpu, cpu) != 1)
goto out;
rec->ip = ip;
if (unlikely(ftrace_record_suspend))
goto out;
list_add(&rec->list, &ftrace_new_addrs);
key = hash_long(ip, FTRACE_HASHBITS);
WARN_ON_ONCE(key >= FTRACE_HASHSIZE);
if (ftrace_ip_in_hash(ip, key))
goto out;
ftrace_hash_lock(flags);
/* This ip may have hit the hash before the lock */
if (ftrace_ip_in_hash(ip, key))
goto out_unlock;
node = ftrace_alloc_dyn_node(ip);
if (!node)
goto out_unlock;
node->ip = ip;
ftrace_add_hash(node, key);
ftraced_trigger = 1;
out_unlock:
ftrace_hash_unlock(flags);
out:
per_cpu(ftrace_shutdown_disable_cpu, cpu)--;
/* prevent recursion with scheduler */
if (resched)
preempt_enable_no_resched_notrace();
else
preempt_enable_notrace();
return rec;
}
#define FTRACE_ADDR ((long)(ftrace_caller))
@@ -559,7 +439,6 @@ static void ftrace_replace_code(int enable)
rec->flags |= FTRACE_FL_FAILED;
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(rec->ip)) {
ftrace_del_hash(rec);
ftrace_free_rec(rec);
}
}
@@ -567,15 +446,6 @@ static void ftrace_replace_code(int enable)
}
}
static void ftrace_shutdown_replenish(void)
{
if (ftrace_pages->next)
return;
/* allocate another page */
ftrace_pages->next = (void *)get_zeroed_page(GFP_KERNEL);
}
static void print_ip_ins(const char *fmt, unsigned char *p)
{
int i;
@@ -591,23 +461,23 @@ ftrace_code_disable(struct dyn_ftrace *rec)
{
unsigned long ip;
unsigned char *nop, *call;
int failed;
int ret;
ip = rec->ip;
nop = ftrace_nop_replace();
call = ftrace_call_replace(ip, mcount_addr);
failed = ftrace_modify_code(ip, call, nop);
if (failed) {
switch (failed) {
case 1:
WARN_ON_ONCE(1);
ret = ftrace_modify_code(ip, call, nop);
if (ret) {
switch (ret) {
case -EFAULT:
FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace faulted on modifying ");
print_ip_sym(ip);
break;
case 2:
WARN_ON_ONCE(1);
case -EINVAL:
FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace failed to modify ");
print_ip_sym(ip);
print_ip_ins(" expected: ", call);
@@ -615,6 +485,15 @@ ftrace_code_disable(struct dyn_ftrace *rec)
print_ip_ins(" replace: ", nop);
printk(KERN_CONT "\n");
break;
case -EPERM:
FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace faulted on writing ");
print_ip_sym(ip);
break;
default:
FTRACE_WARN_ON_ONCE(1);
pr_info("ftrace faulted on unknown error ");
print_ip_sym(ip);
}
rec->flags |= FTRACE_FL_FAILED;
@@ -623,19 +502,11 @@ ftrace_code_disable(struct dyn_ftrace *rec)
return 1;
}
static int __ftrace_update_code(void *ignore);
static int __ftrace_modify_code(void *data)
{
unsigned long addr;
int *command = data;
if (*command & FTRACE_ENABLE_CALLS) {
/*
* Update any recorded ips now that we have the
* machine stopped
*/
__ftrace_update_code(NULL);
ftrace_replace_code(1);
tracing_on = 1;
} else if (*command & FTRACE_DISABLE_CALLS) {
@@ -646,14 +517,6 @@ static int __ftrace_modify_code(void *data)
if (*command & FTRACE_UPDATE_TRACE_FUNC)
ftrace_update_ftrace_func(ftrace_trace_function);
if (*command & FTRACE_ENABLE_MCOUNT) {
addr = (unsigned long)ftrace_record_ip;
ftrace_mcount_set(&addr);
} else if (*command & FTRACE_DISABLE_MCOUNT) {
addr = (unsigned long)ftrace_stub;
ftrace_mcount_set(&addr);
}
return 0;
}
@@ -662,26 +525,9 @@ static void ftrace_run_update_code(int command)
stop_machine(__ftrace_modify_code, &command, NULL);
}
void ftrace_disable_daemon(void)
{
/* Stop the daemon from calling kstop_machine */
mutex_lock(&ftraced_lock);
ftraced_stop = 1;
mutex_unlock(&ftraced_lock);
ftrace_force_update();
}
void ftrace_enable_daemon(void)
{
mutex_lock(&ftraced_lock);
ftraced_stop = 0;
mutex_unlock(&ftraced_lock);
ftrace_force_update();
}
static ftrace_func_t saved_ftrace_func;
static int ftrace_start;
static DEFINE_MUTEX(ftrace_start_lock);
static void ftrace_startup(void)
{
@@ -690,9 +536,9 @@ static void ftrace_startup(void)
if (unlikely(ftrace_disabled))
return;
mutex_lock(&ftraced_lock);
ftraced_suspend++;
if (ftraced_suspend == 1)
mutex_lock(&ftrace_start_lock);
ftrace_start++;
if (ftrace_start == 1)
command |= FTRACE_ENABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -705,7 +551,7 @@ static void ftrace_startup(void)
ftrace_run_update_code(command);
out:
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown(void)
@@ -715,9 +561,9 @@ static void ftrace_shutdown(void)
if (unlikely(ftrace_disabled))
return;
mutex_lock(&ftraced_lock);
ftraced_suspend--;
if (!ftraced_suspend)
mutex_lock(&ftrace_start_lock);
ftrace_start--;
if (!ftrace_start)
command |= FTRACE_DISABLE_CALLS;
if (saved_ftrace_func != ftrace_trace_function) {
@@ -730,7 +576,7 @@ static void ftrace_shutdown(void)
ftrace_run_update_code(command);
out:
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_start_lock);
}
static void ftrace_startup_sysctl(void)
@@ -740,15 +586,15 @@ static void ftrace_startup_sysctl(void)
if (unlikely(ftrace_disabled))
return;
mutex_lock(&ftraced_lock);
mutex_lock(&ftrace_start_lock);
/* Force update next time */
saved_ftrace_func = NULL;
/* ftraced_suspend is true if we want ftrace running */
if (ftraced_suspend)
/* ftrace_start is true if we want ftrace running */
if (ftrace_start)
command |= FTRACE_ENABLE_CALLS;
ftrace_run_update_code(command);
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_start_lock);
}
static void ftrace_shutdown_sysctl(void)
@@ -758,112 +604,50 @@ static void ftrace_shutdown_sysctl(void)
if (unlikely(ftrace_disabled))
return;
mutex_lock(&ftraced_lock);
/* ftraced_suspend is true if ftrace is running */
if (ftraced_suspend)
mutex_lock(&ftrace_start_lock);
/* ftrace_start is true if ftrace is running */
if (ftrace_start)
command |= FTRACE_DISABLE_CALLS;
ftrace_run_update_code(command);
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_start_lock);
}
static cycle_t ftrace_update_time;
static unsigned long ftrace_update_cnt;
unsigned long ftrace_update_tot_cnt;
static int __ftrace_update_code(void *ignore)
static int ftrace_update_code(void)
{
int i, save_ftrace_enabled;
struct dyn_ftrace *p, *t;
cycle_t start, stop;
struct dyn_ftrace *p;
struct hlist_node *t, *n;
struct hlist_head *head, temp_list;
/* Don't be recording funcs now */
ftrace_record_suspend++;
save_ftrace_enabled = ftrace_enabled;
ftrace_enabled = 0;
start = ftrace_now(raw_smp_processor_id());
ftrace_update_cnt = 0;
/* No locks needed, the machine is stopped! */
for (i = 0; i < FTRACE_HASHSIZE; i++) {
INIT_HLIST_HEAD(&temp_list);
head = &ftrace_hash[i];
list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
/* all CPUS are stopped, we are safe to modify code */
hlist_for_each_entry_safe(p, t, n, head, node) {
/* Skip over failed records which have not been
* freed. */
if (p->flags & FTRACE_FL_FAILED)
continue;
/* If something went wrong, bail without enabling anything */
if (unlikely(ftrace_disabled))
return -1;
/* Unconverted records are always at the head of the
* hash bucket. Once we encounter a converted record,
* simply skip over to the next bucket. Saves ftraced
* some processor cycles (ftrace does its bid for
* global warming :-p ). */
if (p->flags & (FTRACE_FL_CONVERTED))
break;
list_del_init(&p->list);
/* Ignore updates to this record's mcount site.
* Reintroduce this record at the head of this
* bucket to attempt to "convert" it again if
* the kprobe on it is unregistered before the
* next run. */
if (get_kprobe((void *)p->ip)) {
ftrace_del_hash(p);
INIT_HLIST_NODE(&p->node);
hlist_add_head(&p->node, &temp_list);
freeze_record(p);
continue;
} else {
unfreeze_record(p);
}
/* convert record (i.e, patch mcount-call with NOP) */
if (ftrace_code_disable(p)) {
p->flags |= FTRACE_FL_CONVERTED;
ftrace_update_cnt++;
} else {
if ((system_state == SYSTEM_BOOTING) ||
!core_kernel_text(p->ip)) {
ftrace_del_hash(p);
ftrace_free_rec(p);
}
}
}
hlist_for_each_entry_safe(p, t, n, &temp_list, node) {
hlist_del(&p->node);
INIT_HLIST_NODE(&p->node);
hlist_add_head(&p->node, head);
}
/* convert record (i.e, patch mcount-call with NOP) */
if (ftrace_code_disable(p)) {
p->flags |= FTRACE_FL_CONVERTED;
ftrace_update_cnt++;
} else
ftrace_free_rec(p);
}
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += ftrace_update_cnt;
ftraced_trigger = 0;
ftrace_enabled = save_ftrace_enabled;
ftrace_record_suspend--;
return 0;
}
static int ftrace_update_code(void)
{
if (unlikely(ftrace_disabled) ||
!ftrace_enabled || !ftraced_trigger)
return 0;
stop_machine(__ftrace_update_code, NULL, NULL);
return 1;
}
static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
{
struct ftrace_page *pg;
@@ -892,7 +676,7 @@ static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
pg = ftrace_pages = ftrace_pages_start;
cnt = num_to_init / ENTRIES_PER_PAGE;
pr_info("ftrace: allocating %ld hash entries in %d pages\n",
pr_info("ftrace: allocating %ld entries in %d pages\n",
num_to_init, cnt);
for (i = 0; i < cnt; i++) {
@@ -1401,10 +1185,10 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable)
}
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftraced_lock);
if (iter->filtered && ftraced_suspend && ftrace_enabled)
mutex_lock(&ftrace_start_lock);
if (iter->filtered && ftrace_start && ftrace_enabled)
ftrace_run_update_code(FTRACE_ENABLE_CALLS);
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_start_lock);
mutex_unlock(&ftrace_sysctl_lock);
kfree(iter);
@@ -1424,55 +1208,6 @@ ftrace_notrace_release(struct inode *inode, struct file *file)
return ftrace_regex_release(inode, file, 0);
}
static ssize_t
ftraced_read(struct file *filp, char __user *ubuf,
size_t cnt, loff_t *ppos)
{
/* don't worry about races */
char *buf = ftraced_stop ? "disabled\n" : "enabled\n";
int r = strlen(buf);
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}
static ssize_t
ftraced_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
char buf[64];
long val;
int ret;
if (cnt >= sizeof(buf))
return -EINVAL;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
if (strncmp(buf, "enable", 6) == 0)
val = 1;
else if (strncmp(buf, "disable", 7) == 0)
val = 0;
else {
buf[cnt] = 0;
ret = strict_strtoul(buf, 10, &val);
if (ret < 0)
return ret;
val = !!val;
}
if (val)
ftrace_enable_daemon();
else
ftrace_disable_daemon();
filp->f_pos += cnt;
return cnt;
}
static struct file_operations ftrace_avail_fops = {
.open = ftrace_avail_open,
.read = seq_read,
@@ -1503,54 +1238,6 @@ static struct file_operations ftrace_notrace_fops = {
.release = ftrace_notrace_release,
};
static struct file_operations ftraced_fops = {
.open = tracing_open_generic,
.read = ftraced_read,
.write = ftraced_write,
};
/**
* ftrace_force_update - force an update to all recording ftrace functions
*/
int ftrace_force_update(void)
{
int ret = 0;
if (unlikely(ftrace_disabled))
return -ENODEV;
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftraced_lock);
/*
* If ftraced_trigger is not set, then there is nothing
* to update.
*/
if (ftraced_trigger && !ftrace_update_code())
ret = -EBUSY;
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_sysctl_lock);
return ret;
}
static void ftrace_force_shutdown(void)
{
struct task_struct *task;
int command = FTRACE_DISABLE_CALLS | FTRACE_UPDATE_TRACE_FUNC;
mutex_lock(&ftraced_lock);
task = ftraced_task;
ftraced_task = NULL;
ftraced_suspend = -1;
ftrace_run_update_code(command);
mutex_unlock(&ftraced_lock);
if (task)
kthread_stop(task);
}
static __init int ftrace_init_debugfs(void)
{
struct dentry *d_tracer;
@@ -1581,17 +1268,11 @@ static __init int ftrace_init_debugfs(void)
pr_warning("Could not create debugfs "
"'set_ftrace_notrace' entry\n");
entry = debugfs_create_file("ftraced_enabled", 0644, d_tracer,
NULL, &ftraced_fops);
if (!entry)
pr_warning("Could not create debugfs "
"'ftraced_enabled' entry\n");
return 0;
}
fs_initcall(ftrace_init_debugfs);
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
static int ftrace_convert_nops(unsigned long *start,
unsigned long *end)
{
@@ -1599,20 +1280,18 @@ static int ftrace_convert_nops(unsigned long *start,
unsigned long addr;
unsigned long flags;
mutex_lock(&ftrace_start_lock);
p = start;
while (p < end) {
addr = ftrace_call_adjust(*p++);
/* should not be called from interrupt context */
spin_lock(&ftrace_lock);
ftrace_record_ip(addr);
spin_unlock(&ftrace_lock);
ftrace_shutdown_replenish();
}
/* p is ignored */
/* disable interrupts to prevent kstop machine */
local_irq_save(flags);
__ftrace_update_code(p);
ftrace_update_code();
local_irq_restore(flags);
mutex_unlock(&ftrace_start_lock);
return 0;
}
@@ -1658,130 +1337,34 @@ void __init ftrace_init(void)
failed:
ftrace_disabled = 1;
}
#else /* CONFIG_FTRACE_MCOUNT_RECORD */
static int ftraced(void *ignore)
{
unsigned long usecs;
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
/* check once a second */
schedule_timeout(HZ);
if (unlikely(ftrace_disabled))
continue;
mutex_lock(&ftrace_sysctl_lock);
mutex_lock(&ftraced_lock);
if (!ftraced_suspend && !ftraced_stop &&
ftrace_update_code()) {
usecs = nsecs_to_usecs(ftrace_update_time);
if (ftrace_update_tot_cnt > 100000) {
ftrace_update_tot_cnt = 0;
pr_info("hm, dftrace overflow: %lu change%s"
" (%lu total) in %lu usec%s\n",
ftrace_update_cnt,
ftrace_update_cnt != 1 ? "s" : "",
ftrace_update_tot_cnt,
usecs, usecs != 1 ? "s" : "");
ftrace_disabled = 1;
WARN_ON_ONCE(1);
}
}
mutex_unlock(&ftraced_lock);
mutex_unlock(&ftrace_sysctl_lock);
ftrace_shutdown_replenish();
}
__set_current_state(TASK_RUNNING);
return 0;
}
static int __init ftrace_dynamic_init(void)
{
struct task_struct *p;
unsigned long addr;
int ret;
addr = (unsigned long)ftrace_record_ip;
stop_machine(ftrace_dyn_arch_init, &addr, NULL);
/* ftrace_dyn_arch_init places the return code in addr */
if (addr) {
ret = (int)addr;
goto failed;
}
ret = ftrace_dyn_table_alloc(NR_TO_INIT);
if (ret)
goto failed;
p = kthread_run(ftraced, NULL, "ftraced");
if (IS_ERR(p)) {
ret = -1;
goto failed;
}
last_ftrace_enabled = ftrace_enabled = 1;
ftraced_task = p;
return 0;
failed:
ftrace_disabled = 1;
return ret;
}
core_initcall(ftrace_dynamic_init);
#endif /* CONFIG_FTRACE_MCOUNT_RECORD */
#else
static int __init ftrace_nodyn_init(void)
{
ftrace_enabled = 1;
return 0;
}
device_initcall(ftrace_nodyn_init);
# define ftrace_startup() do { } while (0)
# define ftrace_shutdown() do { } while (0)
# define ftrace_startup_sysctl() do { } while (0)
# define ftrace_shutdown_sysctl() do { } while (0)
# define ftrace_force_shutdown() do { } while (0)
#endif /* CONFIG_DYNAMIC_FTRACE */
/**
* ftrace_kill_atomic - kill ftrace from critical sections
* ftrace_kill - kill ftrace
*
* This function should be used by panic code. It stops ftrace
* but in a not so nice way. If you need to simply kill ftrace
* from a non-atomic section, use ftrace_kill.
*/
void ftrace_kill_atomic(void)
{
ftrace_disabled = 1;
ftrace_enabled = 0;
#ifdef CONFIG_DYNAMIC_FTRACE
ftraced_suspend = -1;
#endif
clear_ftrace_function();
}
/**
* ftrace_kill - totally shutdown ftrace
*
* This is a safety measure. If something was detected that seems
* wrong, calling this function will keep ftrace from doing
* any more modifications, and updates.
* used when something went wrong.
*/
void ftrace_kill(void)
{
mutex_lock(&ftrace_sysctl_lock);
ftrace_disabled = 1;
ftrace_enabled = 0;
clear_ftrace_function();
mutex_unlock(&ftrace_sysctl_lock);
/* Try to totally disable ftrace */
ftrace_force_shutdown();
}
/**
@@ -1870,3 +1453,4 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
mutex_unlock(&ftrace_sysctl_lock);
return ret;
}

View File

@@ -130,7 +130,7 @@ struct buffer_page {
static inline void free_buffer_page(struct buffer_page *bpage)
{
if (bpage->page)
__free_page(bpage->page);
free_page((unsigned long)bpage->page);
kfree(bpage);
}
@@ -966,7 +966,9 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
if (unlikely(*delta > (1ULL << 59) && !once++)) {
printk(KERN_WARNING "Delta way too big! %llu"
" ts=%llu write stamp = %llu\n",
*delta, *ts, cpu_buffer->write_stamp);
(unsigned long long)*delta,
(unsigned long long)*ts,
(unsigned long long)cpu_buffer->write_stamp);
WARN_ON(1);
}
@@ -1020,8 +1022,23 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
struct ring_buffer_event *event;
u64 ts, delta;
int commit = 0;
int nr_loops = 0;
again:
/*
* We allow for interrupts to reenter here and do a trace.
* If one does, it will cause this original code to loop
* back here. Even with heavy interrupts happening, this
* should only happen a few times in a row. If this happens
* 1000 times in a row, there must be either an interrupt
* storm or we have something buggy.
* Bail!
*/
if (unlikely(++nr_loops > 1000)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
ts = ring_buffer_time_stamp(cpu_buffer->cpu);
/*
@@ -1043,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
/* Did the write stamp get updated already? */
if (unlikely(ts < cpu_buffer->write_stamp))
goto again;
delta = 0;
if (test_time_stamp(delta)) {
@@ -1530,10 +1547,23 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
{
struct buffer_page *reader = NULL;
unsigned long flags;
int nr_loops = 0;
spin_lock_irqsave(&cpu_buffer->lock, flags);
again:
/*
* This should normally only loop twice. But because the
* start of the reader inserts an empty page, it causes
* a case where we will loop three times. There should be no
* reason to loop four times (that I know of).
*/
if (unlikely(++nr_loops > 3)) {
RB_WARN_ON(cpu_buffer, 1);
reader = NULL;
goto out;
}
reader = cpu_buffer->reader_page;
/* If there's more to read, return this page */
@@ -1663,6 +1693,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
struct buffer_page *reader;
int nr_loops = 0;
if (!cpu_isset(cpu, buffer->cpumask))
return NULL;
@@ -1670,6 +1701,19 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
cpu_buffer = buffer->buffers[cpu];
again:
/*
* We repeat when a timestamp is encountered. It is possible
* to get multiple timestamps from an interrupt entering just
* as one timestamp is about to be written. The max times
* that this can happen is the number of nested interrupts we
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
if (unlikely(++nr_loops > 10)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
reader = rb_get_reader_page(cpu_buffer);
if (!reader)
return NULL;
@@ -1720,6 +1764,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
struct ring_buffer *buffer;
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
int nr_loops = 0;
if (ring_buffer_iter_empty(iter))
return NULL;
@@ -1728,6 +1773,19 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
buffer = cpu_buffer->buffer;
again:
/*
* We repeat when a timestamp is encountered. It is possible
* to get multiple timestamps from an interrupt entering just
* as one timestamp is about to be written. The max times
* that this can happen is the number of nested interrupts we
* can have. Nesting 10 deep of interrupts is clearly
* an anomaly.
*/
if (unlikely(++nr_loops > 10)) {
RB_WARN_ON(cpu_buffer, 1);
return NULL;
}
if (rb_per_cpu_empty(cpu_buffer))
return NULL;

View File

@@ -34,6 +34,7 @@
#include <linux/stacktrace.h>
#include <linux/ring_buffer.h>
#include <linux/irqflags.h>
#include "trace.h"
@@ -655,7 +656,11 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
#else
TRACE_FLAG_IRQS_NOSUPPORT |
#endif
((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
@@ -700,6 +705,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
unsigned long flags,
int skip, int pc)
{
#ifdef CONFIG_STACKTRACE
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
@@ -725,6 +731,7 @@ static void ftrace_trace_stack(struct trace_array *tr,
save_stack_trace(&trace);
ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
#endif
}
void __trace_stack(struct trace_array *tr,
@@ -851,7 +858,7 @@ ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
preempt_enable_notrace();
}
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
static void
function_trace_call(unsigned long ip, unsigned long parent_ip)
{
@@ -865,9 +872,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
if (unlikely(!ftrace_function_enabled))
return;
if (skip_trace(ip))
return;
pc = preempt_count();
resched = need_resched();
preempt_disable_notrace();
@@ -1084,17 +1088,20 @@ static void s_stop(struct seq_file *m, void *p)
mutex_unlock(&trace_types_lock);
}
#define KRETPROBE_MSG "[unknown/kretprobe'd]"
#ifdef CONFIG_KRETPROBES
static inline int kretprobed(unsigned long addr)
static inline const char *kretprobed(const char *name)
{
return addr == (unsigned long)kretprobe_trampoline;
static const char tramp_name[] = "kretprobe_trampoline";
int size = sizeof(tramp_name);
if (strncmp(tramp_name, name, size) == 0)
return "[unknown/kretprobe'd]";
return name;
}
#else
static inline int kretprobed(unsigned long addr)
static inline const char *kretprobed(const char *name)
{
return 0;
return name;
}
#endif /* CONFIG_KRETPROBES */
@@ -1103,10 +1110,13 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
const char *name;
kallsyms_lookup(address, NULL, NULL, NULL, str);
return trace_seq_printf(s, fmt, str);
name = kretprobed(str);
return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1117,9 +1127,12 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
{
#ifdef CONFIG_KALLSYMS
char str[KSYM_SYMBOL_LEN];
const char *name;
sprint_symbol(str, address);
return trace_seq_printf(s, fmt, str);
name = kretprobed(str);
return trace_seq_printf(s, fmt, name);
#endif
return 1;
}
@@ -1246,7 +1259,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
trace_seq_printf(s, "%3d", cpu);
trace_seq_printf(s, "%c%c",
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -1372,10 +1386,7 @@ print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
seq_print_ip_sym(s, field->ip, sym_flags);
trace_seq_puts(s, " (");
if (kretprobed(field->parent_ip))
trace_seq_puts(s, KRETPROBE_MSG);
else
seq_print_ip_sym(s, field->parent_ip, sym_flags);
seq_print_ip_sym(s, field->parent_ip, sym_flags);
trace_seq_puts(s, ")\n");
break;
}
@@ -1491,12 +1502,9 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
ret = trace_seq_printf(s, " <-");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
if (kretprobed(field->parent_ip))
ret = trace_seq_puts(s, KRETPROBE_MSG);
else
ret = seq_print_ip_sym(s,
field->parent_ip,
sym_flags);
ret = seq_print_ip_sym(s,
field->parent_ip,
sym_flags);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
@@ -1747,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
return TRACE_TYPE_HANDLED;
SEQ_PUT_FIELD_RET(s, entry->pid);
SEQ_PUT_FIELD_RET(s, iter->cpu);
SEQ_PUT_FIELD_RET(s, entry->cpu);
SEQ_PUT_FIELD_RET(s, iter->ts);
switch (entry->type) {
@@ -2379,9 +2387,10 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
int i;
size_t ret;
ret = cnt;
if (cnt > max_tracer_type_len)
cnt = max_tracer_type_len;
ret = cnt;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
@@ -2414,8 +2423,8 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,
out:
mutex_unlock(&trace_types_lock);
if (ret == cnt)
filp->f_pos += cnt;
if (ret > 0)
filp->f_pos += ret;
return ret;
}
@@ -2667,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
{
unsigned long val;
char buf[64];
int ret;
int ret, cpu;
struct trace_array *tr = filp->private_data;
if (cnt >= sizeof(buf))
@@ -2695,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
goto out;
}
/* disable all cpu buffers */
for_each_tracing_cpu(cpu) {
if (global_trace.data[cpu])
atomic_inc(&global_trace.data[cpu]->disabled);
if (max_tr.data[cpu])
atomic_inc(&max_tr.data[cpu]->disabled);
}
if (val != global_trace.entries) {
ret = ring_buffer_resize(global_trace.buffer, val);
if (ret < 0) {
@@ -2726,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
if (tracing_disabled)
cnt = -ENOMEM;
out:
for_each_tracing_cpu(cpu) {
if (global_trace.data[cpu])
atomic_dec(&global_trace.data[cpu]->disabled);
if (max_tr.data[cpu])
atomic_dec(&max_tr.data[cpu]->disabled);
}
max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
@@ -3097,7 +3121,7 @@ void ftrace_dump(void)
dump_ran = 1;
/* No turning back! */
ftrace_kill_atomic();
ftrace_kill();
for_each_tracing_cpu(cpu) {
atomic_inc(&global_trace.data[cpu]->disabled);

View File

@@ -120,18 +120,20 @@ struct trace_boot {
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
* IRQS_OFF - interrupts were disabled
* NEED_RESCED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
* CONT - multiple entries hold the trace item
* IRQS_OFF - interrupts were disabled
* IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
* NEED_RESCED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
* CONT - multiple entries hold the trace item
*/
enum trace_flag_type {
TRACE_FLAG_IRQS_OFF = 0x01,
TRACE_FLAG_NEED_RESCHED = 0x02,
TRACE_FLAG_HARDIRQ = 0x04,
TRACE_FLAG_SOFTIRQ = 0x08,
TRACE_FLAG_CONT = 0x10,
TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
TRACE_FLAG_NEED_RESCHED = 0x04,
TRACE_FLAG_HARDIRQ = 0x08,
TRACE_FLAG_SOFTIRQ = 0x10,
TRACE_FLAG_CONT = 0x20,
};
#define TRACE_BUF_SIZE 1024
@@ -335,7 +337,7 @@ void update_max_tr_single(struct trace_array *tr,
extern cycle_t ftrace_now(int cpu);
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
void tracing_start_function_trace(void);
void tracing_stop_function_trace(void);
#else

View File

@@ -64,7 +64,7 @@ static void function_trace_ctrl_update(struct trace_array *tr)
static struct tracer function_trace __read_mostly =
{
.name = "ftrace",
.name = "function",
.init = function_trace_init,
.reset = function_trace_reset,
.ctrl_update = function_trace_ctrl_update,

View File

@@ -63,7 +63,7 @@ irq_trace(void)
*/
static __cacheline_aligned_in_smp unsigned long max_sequence;
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -104,7 +104,7 @@ static struct ftrace_ops trace_ops __read_mostly =
{
.func = irqsoff_tracer_call,
};
#endif /* CONFIG_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?

View File

@@ -31,7 +31,7 @@ static raw_spinlock_t wakeup_lock =
static void __wakeup_reset(struct trace_array *tr);
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
/*
* irqsoff uses its own tracer function to keep the overhead down:
*/
@@ -96,7 +96,7 @@ static struct ftrace_ops trace_ops __read_mostly =
{
.func = wakeup_tracer_call,
};
#endif /* CONFIG_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
/*
* Should this new latency be reported/recorded?

View File

@@ -70,7 +70,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
return ret;
}
#ifdef CONFIG_FTRACE
#ifdef CONFIG_FUNCTION_TRACER
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -99,13 +99,6 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
/* passed in by parameter to fool gcc from optimizing */
func();
/* update the records */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/*
* Some archs *cough*PowerPC*cough* add charachters to the
* start of the function names. We simply put a '*' to
@@ -183,13 +176,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
/* make sure msleep has been recorded */
msleep(1);
/* force the recorded functions to be traced */
ret = ftrace_force_update();
if (ret) {
printk(KERN_CONT ".. ftraced failed .. ");
return ret;
}
/* start the tracing */
ftrace_enabled = 1;
tracer_enabled = 1;
@@ -226,7 +212,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
return ret;
}
#endif /* CONFIG_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_IRQSOFF_TRACER
int

View File

@@ -44,6 +44,10 @@ static inline void check_stack(void)
if (this_size <= max_stack_size)
return;
/* we do not handle interrupt stacks yet */
if (!object_is_on_stack(&this_size))
return;
raw_local_irq_save(flags);
__raw_spin_lock(&max_stack_lock);

View File

@@ -131,6 +131,9 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe)
old = entry->funcs;
if (!old)
return NULL;
debug_print_probes(entry);
/* (N -> M), (N > 1, M >= 0) probes */
for (nr_probes = 0; old[nr_probes]; nr_probes++) {
@@ -388,6 +391,11 @@ int tracepoint_probe_unregister(const char *name, void *probe)
if (entry->rcu_pending)
rcu_barrier_sched();
old = tracepoint_entry_remove_probe(entry, probe);
if (!old) {
printk(KERN_WARNING "Warning: Trying to unregister a probe"
"that doesn't exist\n");
goto end;
}
mutex_unlock(&tracepoints_mutex);
tracepoint_update_probes(); /* may update entry */
mutex_lock(&tracepoints_mutex);

View File

@@ -970,6 +970,51 @@ undo:
return ret;
}
#ifdef CONFIG_SMP
struct work_for_cpu {
struct work_struct work;
long (*fn)(void *);
void *arg;
long ret;
};
static void do_work_for_cpu(struct work_struct *w)
{
struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
wfc->ret = wfc->fn(wfc->arg);
}
/**
* work_on_cpu - run a function in user context on a particular cpu
* @cpu: the cpu to run on
* @fn: the function to run
* @arg: the function arg
*
* This will return -EINVAL in the cpu is not online, or the return value
* of @fn otherwise.
*/
long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
{
struct work_for_cpu wfc;
INIT_WORK(&wfc.work, do_work_for_cpu);
wfc.fn = fn;
wfc.arg = arg;
get_online_cpus();
if (unlikely(!cpu_online(cpu)))
wfc.ret = -EINVAL;
else {
schedule_work_on(cpu, &wfc.work);
flush_work(&wfc.work);
}
put_online_cpus();
return wfc.ret;
}
EXPORT_SYMBOL_GPL(work_on_cpu);
#endif /* CONFIG_SMP */
void __init init_workqueues(void)
{
cpu_populated_map = cpu_online_map;