Merge commit '8700c95adb03' into timers/nohz
The full dynticks tree needs the latest RCU and sched upstream updates in order to fix some dependencies. Merge a common upstream merge point that has these updates. Conflicts: include/linux/perf_event.h kernel/rcutree.h kernel/rcutree_plugin.h Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
This commit is contained in:
1
kernel/.gitignore
vendored
1
kernel/.gitignore
vendored
@@ -4,3 +4,4 @@
|
||||
config_data.h
|
||||
config_data.gz
|
||||
timeconst.h
|
||||
hz.bc
|
||||
|
@@ -24,6 +24,7 @@ endif
|
||||
|
||||
obj-y += sched/
|
||||
obj-y += power/
|
||||
obj-y += cpu/
|
||||
|
||||
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
|
||||
obj-$(CONFIG_FREEZER) += freezer.o
|
||||
|
@@ -73,7 +73,7 @@ struct async_entry {
|
||||
struct list_head global_list;
|
||||
struct work_struct work;
|
||||
async_cookie_t cookie;
|
||||
async_func_ptr *func;
|
||||
async_func_t func;
|
||||
void *data;
|
||||
struct async_domain *domain;
|
||||
};
|
||||
@@ -84,24 +84,20 @@ static atomic_t entry_count;
|
||||
|
||||
static async_cookie_t lowest_in_progress(struct async_domain *domain)
|
||||
{
|
||||
struct async_entry *first = NULL;
|
||||
struct list_head *pending;
|
||||
async_cookie_t ret = ASYNC_COOKIE_MAX;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&async_lock, flags);
|
||||
|
||||
if (domain) {
|
||||
if (!list_empty(&domain->pending))
|
||||
first = list_first_entry(&domain->pending,
|
||||
struct async_entry, domain_list);
|
||||
} else {
|
||||
if (!list_empty(&async_global_pending))
|
||||
first = list_first_entry(&async_global_pending,
|
||||
struct async_entry, global_list);
|
||||
}
|
||||
if (domain)
|
||||
pending = &domain->pending;
|
||||
else
|
||||
pending = &async_global_pending;
|
||||
|
||||
if (first)
|
||||
ret = first->cookie;
|
||||
if (!list_empty(pending))
|
||||
ret = list_first_entry(pending, struct async_entry,
|
||||
domain_list)->cookie;
|
||||
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
return ret;
|
||||
@@ -149,7 +145,7 @@ static void async_run_entry_fn(struct work_struct *work)
|
||||
wake_up(&async_done);
|
||||
}
|
||||
|
||||
static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct async_domain *domain)
|
||||
static async_cookie_t __async_schedule(async_func_t func, void *data, struct async_domain *domain)
|
||||
{
|
||||
struct async_entry *entry;
|
||||
unsigned long flags;
|
||||
@@ -169,13 +165,13 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
|
||||
spin_unlock_irqrestore(&async_lock, flags);
|
||||
|
||||
/* low on memory.. run synchronously */
|
||||
ptr(data, newcookie);
|
||||
func(data, newcookie);
|
||||
return newcookie;
|
||||
}
|
||||
INIT_LIST_HEAD(&entry->domain_list);
|
||||
INIT_LIST_HEAD(&entry->global_list);
|
||||
INIT_WORK(&entry->work, async_run_entry_fn);
|
||||
entry->func = ptr;
|
||||
entry->func = func;
|
||||
entry->data = data;
|
||||
entry->domain = domain;
|
||||
|
||||
@@ -202,21 +198,21 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
|
||||
|
||||
/**
|
||||
* async_schedule - schedule a function for asynchronous execution
|
||||
* @ptr: function to execute asynchronously
|
||||
* @func: function to execute asynchronously
|
||||
* @data: data pointer to pass to the function
|
||||
*
|
||||
* Returns an async_cookie_t that may be used for checkpointing later.
|
||||
* Note: This function may be called from atomic or non-atomic contexts.
|
||||
*/
|
||||
async_cookie_t async_schedule(async_func_ptr *ptr, void *data)
|
||||
async_cookie_t async_schedule(async_func_t func, void *data)
|
||||
{
|
||||
return __async_schedule(ptr, data, &async_dfl_domain);
|
||||
return __async_schedule(func, data, &async_dfl_domain);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_schedule);
|
||||
|
||||
/**
|
||||
* async_schedule_domain - schedule a function for asynchronous execution within a certain domain
|
||||
* @ptr: function to execute asynchronously
|
||||
* @func: function to execute asynchronously
|
||||
* @data: data pointer to pass to the function
|
||||
* @domain: the domain
|
||||
*
|
||||
@@ -226,10 +222,10 @@ EXPORT_SYMBOL_GPL(async_schedule);
|
||||
* synchronization domain is specified via @domain. Note: This function
|
||||
* may be called from atomic or non-atomic contexts.
|
||||
*/
|
||||
async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data,
|
||||
async_cookie_t async_schedule_domain(async_func_t func, void *data,
|
||||
struct async_domain *domain)
|
||||
{
|
||||
return __async_schedule(ptr, data, domain);
|
||||
return __async_schedule(func, data, domain);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(async_schedule_domain);
|
||||
|
||||
|
@@ -660,14 +660,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
|
||||
|
||||
/* As soon as there's any sign of userspace auditd,
|
||||
* start kauditd to talk to it */
|
||||
if (!kauditd_task)
|
||||
if (!kauditd_task) {
|
||||
kauditd_task = kthread_run(kauditd_thread, NULL, "kauditd");
|
||||
if (IS_ERR(kauditd_task)) {
|
||||
err = PTR_ERR(kauditd_task);
|
||||
kauditd_task = NULL;
|
||||
return err;
|
||||
if (IS_ERR(kauditd_task)) {
|
||||
err = PTR_ERR(kauditd_task);
|
||||
kauditd_task = NULL;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
loginuid = audit_get_loginuid(current);
|
||||
sessionid = audit_get_sessionid(current);
|
||||
security_task_getsecid(current, &sid);
|
||||
|
@@ -59,10 +59,7 @@ struct audit_entry {
|
||||
struct audit_krule rule;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_AUDIT
|
||||
extern int audit_enabled;
|
||||
extern int audit_ever_enabled;
|
||||
#endif
|
||||
|
||||
extern int audit_pid;
|
||||
|
||||
|
@@ -617,9 +617,9 @@ void audit_trim_trees(void)
|
||||
}
|
||||
spin_unlock(&hash_lock);
|
||||
trim_marked(tree);
|
||||
put_tree(tree);
|
||||
drop_collected_mounts(root_mnt);
|
||||
skip_it:
|
||||
put_tree(tree);
|
||||
mutex_lock(&audit_filter_mutex);
|
||||
}
|
||||
list_del(&cursor);
|
||||
|
@@ -594,6 +594,10 @@ exit_nofree:
|
||||
return entry;
|
||||
|
||||
exit_free:
|
||||
if (entry->rule.watch)
|
||||
audit_put_watch(entry->rule.watch); /* matches initial get */
|
||||
if (entry->rule.tree)
|
||||
audit_put_tree(entry->rule.tree); /* that's the temporary one */
|
||||
audit_free_rule(entry);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
@@ -1034,21 +1034,15 @@ static inline void audit_free_aux(struct audit_context *context)
|
||||
}
|
||||
}
|
||||
|
||||
static inline void audit_zero_context(struct audit_context *context,
|
||||
enum audit_state state)
|
||||
{
|
||||
memset(context, 0, sizeof(*context));
|
||||
context->state = state;
|
||||
context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
|
||||
}
|
||||
|
||||
static inline struct audit_context *audit_alloc_context(enum audit_state state)
|
||||
{
|
||||
struct audit_context *context;
|
||||
|
||||
if (!(context = kmalloc(sizeof(*context), GFP_KERNEL)))
|
||||
context = kzalloc(sizeof(*context), GFP_KERNEL);
|
||||
if (!context)
|
||||
return NULL;
|
||||
audit_zero_context(context, state);
|
||||
context->state = state;
|
||||
context->prio = state == AUDIT_RECORD_CONTEXT ? ~0ULL : 0;
|
||||
INIT_LIST_HEAD(&context->killed_trees);
|
||||
INIT_LIST_HEAD(&context->names_list);
|
||||
return context;
|
||||
|
@@ -392,6 +392,30 @@ bool ns_capable(struct user_namespace *ns, int cap)
|
||||
}
|
||||
EXPORT_SYMBOL(ns_capable);
|
||||
|
||||
/**
|
||||
* file_ns_capable - Determine if the file's opener had a capability in effect
|
||||
* @file: The file we want to check
|
||||
* @ns: The usernamespace we want the capability in
|
||||
* @cap: The capability to be tested for
|
||||
*
|
||||
* Return true if task that opened the file had a capability in effect
|
||||
* when the file was opened.
|
||||
*
|
||||
* This does not set PF_SUPERPRIV because the caller may not
|
||||
* actually be privileged.
|
||||
*/
|
||||
bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap)
|
||||
{
|
||||
if (WARN_ON_ONCE(!cap_valid(cap)))
|
||||
return false;
|
||||
|
||||
if (security_capable(file->f_cred, ns, cap) == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL(file_ns_capable);
|
||||
|
||||
/**
|
||||
* capable - Determine if the current task has a superior capability in effect
|
||||
* @cap: The capability to be tested for
|
||||
|
780
kernel/cgroup.c
780
kernel/cgroup.c
File diff suppressed because it is too large
Load Diff
1
kernel/cpu/Makefile
Normal file
1
kernel/cpu/Makefile
Normal file
@@ -0,0 +1 @@
|
||||
obj-y = idle.o
|
107
kernel/cpu/idle.c
Normal file
107
kernel/cpu/idle.c
Normal file
@@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Generic entry point for the idle threads
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/tlb.h>
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
static int __read_mostly cpu_idle_force_poll;
|
||||
|
||||
void cpu_idle_poll_ctrl(bool enable)
|
||||
{
|
||||
if (enable) {
|
||||
cpu_idle_force_poll++;
|
||||
} else {
|
||||
cpu_idle_force_poll--;
|
||||
WARN_ON_ONCE(cpu_idle_force_poll < 0);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_GENERIC_IDLE_POLL_SETUP
|
||||
static int __init cpu_idle_poll_setup(char *__unused)
|
||||
{
|
||||
cpu_idle_force_poll = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("nohlt", cpu_idle_poll_setup);
|
||||
|
||||
static int __init cpu_idle_nopoll_setup(char *__unused)
|
||||
{
|
||||
cpu_idle_force_poll = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("hlt", cpu_idle_nopoll_setup);
|
||||
#endif
|
||||
|
||||
static inline int cpu_idle_poll(void)
|
||||
{
|
||||
trace_cpu_idle_rcuidle(0, smp_processor_id());
|
||||
local_irq_enable();
|
||||
while (!need_resched())
|
||||
cpu_relax();
|
||||
trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Weak implementations for optional arch specific functions */
|
||||
void __weak arch_cpu_idle_prepare(void) { }
|
||||
void __weak arch_cpu_idle_enter(void) { }
|
||||
void __weak arch_cpu_idle_exit(void) { }
|
||||
void __weak arch_cpu_idle_dead(void) { }
|
||||
void __weak arch_cpu_idle(void)
|
||||
{
|
||||
cpu_idle_force_poll = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generic idle loop implementation
|
||||
*/
|
||||
static void cpu_idle_loop(void)
|
||||
{
|
||||
while (1) {
|
||||
tick_nohz_idle_enter();
|
||||
|
||||
while (!need_resched()) {
|
||||
check_pgt_cache();
|
||||
rmb();
|
||||
|
||||
if (cpu_is_offline(smp_processor_id()))
|
||||
arch_cpu_idle_dead();
|
||||
|
||||
local_irq_disable();
|
||||
arch_cpu_idle_enter();
|
||||
|
||||
if (cpu_idle_force_poll) {
|
||||
cpu_idle_poll();
|
||||
} else {
|
||||
current_clr_polling();
|
||||
if (!need_resched()) {
|
||||
stop_critical_timings();
|
||||
rcu_idle_enter();
|
||||
arch_cpu_idle();
|
||||
WARN_ON_ONCE(irqs_disabled());
|
||||
rcu_idle_exit();
|
||||
start_critical_timings();
|
||||
} else {
|
||||
local_irq_enable();
|
||||
}
|
||||
current_set_polling();
|
||||
}
|
||||
arch_cpu_idle_exit();
|
||||
}
|
||||
tick_nohz_idle_exit();
|
||||
schedule_preempt_disabled();
|
||||
}
|
||||
}
|
||||
|
||||
void cpu_startup_entry(enum cpuhp_state state)
|
||||
{
|
||||
current_set_polling();
|
||||
arch_cpu_idle_prepare();
|
||||
cpu_idle_loop();
|
||||
}
|
143
kernel/cpuset.c
143
kernel/cpuset.c
@@ -264,17 +264,6 @@ static struct cpuset top_cpuset = {
|
||||
static DEFINE_MUTEX(cpuset_mutex);
|
||||
static DEFINE_MUTEX(callback_mutex);
|
||||
|
||||
/*
|
||||
* cpuset_buffer_lock protects both the cpuset_name and cpuset_nodelist
|
||||
* buffers. They are statically allocated to prevent using excess stack
|
||||
* when calling cpuset_print_task_mems_allowed().
|
||||
*/
|
||||
#define CPUSET_NAME_LEN (128)
|
||||
#define CPUSET_NODELIST_LEN (256)
|
||||
static char cpuset_name[CPUSET_NAME_LEN];
|
||||
static char cpuset_nodelist[CPUSET_NODELIST_LEN];
|
||||
static DEFINE_SPINLOCK(cpuset_buffer_lock);
|
||||
|
||||
/*
|
||||
* CPU / memory hotplug is handled asynchronously.
|
||||
*/
|
||||
@@ -780,25 +769,26 @@ static void rebuild_sched_domains_locked(void)
|
||||
lockdep_assert_held(&cpuset_mutex);
|
||||
get_online_cpus();
|
||||
|
||||
/*
|
||||
* We have raced with CPU hotplug. Don't do anything to avoid
|
||||
* passing doms with offlined cpu to partition_sched_domains().
|
||||
* Anyways, hotplug work item will rebuild sched domains.
|
||||
*/
|
||||
if (!cpumask_equal(top_cpuset.cpus_allowed, cpu_active_mask))
|
||||
goto out;
|
||||
|
||||
/* Generate domain masks and attrs */
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
|
||||
/* Have scheduler rebuild the domains */
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
|
||||
out:
|
||||
put_online_cpus();
|
||||
}
|
||||
#else /* !CONFIG_SMP */
|
||||
static void rebuild_sched_domains_locked(void)
|
||||
{
|
||||
}
|
||||
|
||||
static int generate_sched_domains(cpumask_var_t **domains,
|
||||
struct sched_domain_attr **attributes)
|
||||
{
|
||||
*domains = NULL;
|
||||
return 1;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
void rebuild_sched_domains(void)
|
||||
@@ -1388,16 +1378,16 @@ static int cpuset_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
|
||||
|
||||
cgroup_taskset_for_each(task, cgrp, tset) {
|
||||
/*
|
||||
* Kthreads bound to specific cpus cannot be moved to a new
|
||||
* cpuset; we cannot change their cpu affinity and
|
||||
* isolating such threads by their set of allowed nodes is
|
||||
* unnecessary. Thus, cpusets are not applicable for such
|
||||
* threads. This prevents checking for success of
|
||||
* set_cpus_allowed_ptr() on all attached tasks before
|
||||
* cpus_allowed may be changed.
|
||||
* Kthreads which disallow setaffinity shouldn't be moved
|
||||
* to a new cpuset; we don't want to change their cpu
|
||||
* affinity and isolating such threads by their set of
|
||||
* allowed nodes is unnecessary. Thus, cpusets are not
|
||||
* applicable for such threads. This prevents checking for
|
||||
* success of set_cpus_allowed_ptr() on all attached tasks
|
||||
* before cpus_allowed may be changed.
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
if (task->flags & PF_THREAD_BOUND)
|
||||
if (task->flags & PF_NO_SETAFFINITY)
|
||||
goto out_unlock;
|
||||
ret = security_task_setscheduler(task);
|
||||
if (ret)
|
||||
@@ -2005,50 +1995,6 @@ int __init cpuset_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* cpuset_do_move_task - move a given task to another cpuset
|
||||
* @tsk: pointer to task_struct the task to move
|
||||
* @scan: struct cgroup_scanner contained in its struct cpuset_hotplug_scanner
|
||||
*
|
||||
* Called by cgroup_scan_tasks() for each task in a cgroup.
|
||||
* Return nonzero to stop the walk through the tasks.
|
||||
*/
|
||||
static void cpuset_do_move_task(struct task_struct *tsk,
|
||||
struct cgroup_scanner *scan)
|
||||
{
|
||||
struct cgroup *new_cgroup = scan->data;
|
||||
|
||||
cgroup_lock();
|
||||
cgroup_attach_task(new_cgroup, tsk);
|
||||
cgroup_unlock();
|
||||
}
|
||||
|
||||
/**
|
||||
* move_member_tasks_to_cpuset - move tasks from one cpuset to another
|
||||
* @from: cpuset in which the tasks currently reside
|
||||
* @to: cpuset to which the tasks will be moved
|
||||
*
|
||||
* Called with cpuset_mutex held
|
||||
* callback_mutex must not be held, as cpuset_attach() will take it.
|
||||
*
|
||||
* The cgroup_scan_tasks() function will scan all the tasks in a cgroup,
|
||||
* calling callback functions for each.
|
||||
*/
|
||||
static void move_member_tasks_to_cpuset(struct cpuset *from, struct cpuset *to)
|
||||
{
|
||||
struct cgroup_scanner scan;
|
||||
|
||||
scan.cg = from->css.cgroup;
|
||||
scan.test_task = NULL; /* select all tasks in cgroup */
|
||||
scan.process_task = cpuset_do_move_task;
|
||||
scan.heap = NULL;
|
||||
scan.data = to->css.cgroup;
|
||||
|
||||
if (cgroup_scan_tasks(&scan))
|
||||
printk(KERN_ERR "move_member_tasks_to_cpuset: "
|
||||
"cgroup_scan_tasks failed\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* If CPU and/or memory hotplug handlers, below, unplug any CPUs
|
||||
* or memory nodes, we need to walk over the cpuset hierarchy,
|
||||
@@ -2069,7 +2015,12 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
||||
nodes_empty(parent->mems_allowed))
|
||||
parent = parent_cs(parent);
|
||||
|
||||
move_member_tasks_to_cpuset(cs, parent);
|
||||
if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
|
||||
rcu_read_lock();
|
||||
printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
|
||||
cgroup_name(cs->css.cgroup));
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2222,17 +2173,8 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
||||
flush_workqueue(cpuset_propagate_hotplug_wq);
|
||||
|
||||
/* rebuild sched domains if cpus_allowed has changed */
|
||||
if (cpus_updated) {
|
||||
struct sched_domain_attr *attr;
|
||||
cpumask_var_t *doms;
|
||||
int ndoms;
|
||||
|
||||
mutex_lock(&cpuset_mutex);
|
||||
ndoms = generate_sched_domains(&doms, &attr);
|
||||
mutex_unlock(&cpuset_mutex);
|
||||
|
||||
partition_sched_domains(ndoms, doms, attr);
|
||||
}
|
||||
if (cpus_updated)
|
||||
rebuild_sched_domains();
|
||||
}
|
||||
|
||||
void cpuset_update_active_cpus(bool cpu_online)
|
||||
@@ -2251,7 +2193,6 @@ void cpuset_update_active_cpus(bool cpu_online)
|
||||
schedule_work(&cpuset_hotplug_work);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
|
||||
* Call this routine anytime after node_states[N_MEMORY] changes.
|
||||
@@ -2263,20 +2204,23 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
|
||||
schedule_work(&cpuset_hotplug_work);
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct notifier_block cpuset_track_online_nodes_nb = {
|
||||
.notifier_call = cpuset_track_online_nodes,
|
||||
.priority = 10, /* ??! */
|
||||
};
|
||||
|
||||
/**
|
||||
* cpuset_init_smp - initialize cpus_allowed
|
||||
*
|
||||
* Description: Finish top cpuset after cpu, node maps are initialized
|
||||
**/
|
||||
|
||||
*/
|
||||
void __init cpuset_init_smp(void)
|
||||
{
|
||||
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
||||
top_cpuset.mems_allowed = node_states[N_MEMORY];
|
||||
|
||||
hotplug_memory_notifier(cpuset_track_online_nodes, 10);
|
||||
register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
|
||||
|
||||
cpuset_propagate_hotplug_wq =
|
||||
alloc_ordered_workqueue("cpuset_hotplug", 0);
|
||||
@@ -2592,6 +2536,8 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
|
||||
return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
|
||||
}
|
||||
|
||||
#define CPUSET_NODELIST_LEN (256)
|
||||
|
||||
/**
|
||||
* cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
|
||||
* @task: pointer to task_struct of some task.
|
||||
@@ -2602,25 +2548,22 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
|
||||
*/
|
||||
void cpuset_print_task_mems_allowed(struct task_struct *tsk)
|
||||
{
|
||||
struct dentry *dentry;
|
||||
/* Statically allocated to prevent using excess stack. */
|
||||
static char cpuset_nodelist[CPUSET_NODELIST_LEN];
|
||||
static DEFINE_SPINLOCK(cpuset_buffer_lock);
|
||||
|
||||
dentry = task_cs(tsk)->css.cgroup->dentry;
|
||||
struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
|
||||
|
||||
rcu_read_lock();
|
||||
spin_lock(&cpuset_buffer_lock);
|
||||
|
||||
if (!dentry) {
|
||||
strcpy(cpuset_name, "/");
|
||||
} else {
|
||||
spin_lock(&dentry->d_lock);
|
||||
strlcpy(cpuset_name, (const char *)dentry->d_name.name,
|
||||
CPUSET_NAME_LEN);
|
||||
spin_unlock(&dentry->d_lock);
|
||||
}
|
||||
|
||||
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
|
||||
tsk->mems_allowed);
|
||||
printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
|
||||
tsk->comm, cpuset_name, cpuset_nodelist);
|
||||
tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
|
||||
|
||||
spin_unlock(&cpuset_buffer_lock);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#include <linux/ftrace_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -234,6 +235,20 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
/*
|
||||
* perf_cgroup_info keeps track of time_enabled for a cgroup.
|
||||
* This is a per-cpu dynamically allocated data structure.
|
||||
*/
|
||||
struct perf_cgroup_info {
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
struct perf_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
struct perf_cgroup_info __percpu *info;
|
||||
};
|
||||
|
||||
/*
|
||||
* Must ensure cgroup is pinned (css_get) before calling
|
||||
* this function. In other words, we cannot call this function
|
||||
@@ -252,7 +267,22 @@ perf_cgroup_match(struct perf_event *event)
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
|
||||
return !event->cgrp || event->cgrp == cpuctx->cgrp;
|
||||
/* @event doesn't care about cgroup */
|
||||
if (!event->cgrp)
|
||||
return true;
|
||||
|
||||
/* wants specific cgroup scope but @cpuctx isn't associated with any */
|
||||
if (!cpuctx->cgrp)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Cgroup scoping is recursive. An event enabled for a cgroup is
|
||||
* also enabled for all its descendant cgroups. If @cpuctx's
|
||||
* cgroup is a descendant of @event's (the test covers identity
|
||||
* case), it's a match.
|
||||
*/
|
||||
return cgroup_is_descendant(cpuctx->cgrp->css.cgroup,
|
||||
event->cgrp->css.cgroup);
|
||||
}
|
||||
|
||||
static inline bool perf_tryget_cgroup(struct perf_event *event)
|
||||
@@ -966,9 +996,15 @@ static void perf_event__header_size(struct perf_event *event)
|
||||
if (sample_type & PERF_SAMPLE_PERIOD)
|
||||
size += sizeof(data->period);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
size += sizeof(data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_READ)
|
||||
size += event->read_size;
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
size += sizeof(data->data_src.val);
|
||||
|
||||
event->header_size = size;
|
||||
}
|
||||
|
||||
@@ -4193,6 +4229,12 @@ void perf_output_sample(struct perf_output_handle *handle,
|
||||
perf_output_sample_ustack(handle,
|
||||
data->stack_user_size,
|
||||
data->regs_user.regs);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_WEIGHT)
|
||||
perf_output_put(handle, data->weight);
|
||||
|
||||
if (sample_type & PERF_SAMPLE_DATA_SRC)
|
||||
perf_output_put(handle, data->data_src.val);
|
||||
}
|
||||
|
||||
void perf_prepare_sample(struct perf_event_header *header,
|
||||
@@ -4449,12 +4491,15 @@ static void perf_event_task_event(struct perf_task_event *task_event)
|
||||
if (ctxn < 0)
|
||||
goto next;
|
||||
ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
|
||||
if (ctx)
|
||||
perf_event_task_ctx(ctx, task_event);
|
||||
}
|
||||
if (ctx)
|
||||
perf_event_task_ctx(ctx, task_event);
|
||||
next:
|
||||
put_cpu_ptr(pmu->pmu_cpu_context);
|
||||
}
|
||||
if (task_event->task_ctx)
|
||||
perf_event_task_ctx(task_event->task_ctx, task_event);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -4608,6 +4653,7 @@ void perf_event_comm(struct task_struct *task)
|
||||
struct perf_event_context *ctx;
|
||||
int ctxn;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_task_context_nr(ctxn) {
|
||||
ctx = task->perf_event_ctxp[ctxn];
|
||||
if (!ctx)
|
||||
@@ -4615,6 +4661,7 @@ void perf_event_comm(struct task_struct *task)
|
||||
|
||||
perf_event_enable_on_exec(ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!atomic_read(&nr_comm_events))
|
||||
return;
|
||||
@@ -4749,7 +4796,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
} else {
|
||||
if (arch_vma_name(mmap_event->vma)) {
|
||||
name = strncpy(tmp, arch_vma_name(mmap_event->vma),
|
||||
sizeof(tmp));
|
||||
sizeof(tmp) - 1);
|
||||
tmp[sizeof(tmp) - 1] = '\0';
|
||||
goto got_name;
|
||||
}
|
||||
|
||||
@@ -4776,6 +4824,9 @@ got_name:
|
||||
mmap_event->file_name = name;
|
||||
mmap_event->file_size = size;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC))
|
||||
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
|
||||
|
||||
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -5342,7 +5393,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
|
||||
|
||||
static int perf_swevent_init(struct perf_event *event)
|
||||
{
|
||||
int event_id = event->attr.config;
|
||||
u64 event_id = event->attr.config;
|
||||
|
||||
if (event->attr.type != PERF_TYPE_SOFTWARE)
|
||||
return -ENOENT;
|
||||
@@ -5662,6 +5713,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
|
||||
event->attr.sample_period = NSEC_PER_SEC / freq;
|
||||
hwc->sample_period = event->attr.sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
hwc->last_period = hwc->sample_period;
|
||||
event->attr.freq = 0;
|
||||
}
|
||||
}
|
||||
@@ -5997,6 +6049,7 @@ skip_type:
|
||||
if (pmu->pmu_cpu_context)
|
||||
goto got_cpu_context;
|
||||
|
||||
ret = -ENOMEM;
|
||||
pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
|
||||
if (!pmu->pmu_cpu_context)
|
||||
goto free_dev;
|
||||
@@ -7524,12 +7577,5 @@ struct cgroup_subsys perf_subsys = {
|
||||
.css_free = perf_cgroup_css_free,
|
||||
.exit = perf_cgroup_exit,
|
||||
.attach = perf_cgroup_attach,
|
||||
|
||||
/*
|
||||
* perf_event cgroup doesn't handle nesting correctly.
|
||||
* ctx->nr_cgroups adjustments should be propagated through the
|
||||
* cgroup hierarchy. Fix it and remove the following.
|
||||
*/
|
||||
.broken_hierarchy = true,
|
||||
};
|
||||
#endif /* CONFIG_CGROUP_PERF */
|
||||
|
@@ -16,7 +16,7 @@ struct ring_buffer {
|
||||
int page_order; /* allocation order */
|
||||
#endif
|
||||
int nr_pages; /* nr of data pages */
|
||||
int writable; /* are we writable */
|
||||
int overwrite; /* can overwrite itself */
|
||||
|
||||
atomic_t poll; /* POLL_ for wakeups */
|
||||
|
||||
|
@@ -18,12 +18,24 @@
|
||||
static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
|
||||
unsigned long offset, unsigned long head)
|
||||
{
|
||||
unsigned long mask;
|
||||
unsigned long sz = perf_data_size(rb);
|
||||
unsigned long mask = sz - 1;
|
||||
|
||||
if (!rb->writable)
|
||||
/*
|
||||
* check if user-writable
|
||||
* overwrite : over-write its own tail
|
||||
* !overwrite: buffer possibly drops events.
|
||||
*/
|
||||
if (rb->overwrite)
|
||||
return true;
|
||||
|
||||
mask = perf_data_size(rb) - 1;
|
||||
/*
|
||||
* verify that payload is not bigger than buffer
|
||||
* otherwise masking logic may fail to detect
|
||||
* the "not enough space" condition
|
||||
*/
|
||||
if ((head - offset) > sz)
|
||||
return false;
|
||||
|
||||
offset = (offset - tail) & mask;
|
||||
head = (head - tail) & mask;
|
||||
@@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
|
||||
rb->watermark = max_size / 2;
|
||||
|
||||
if (flags & RING_BUFFER_WRITABLE)
|
||||
rb->writable = 1;
|
||||
rb->overwrite = 0;
|
||||
else
|
||||
rb->overwrite = 1;
|
||||
|
||||
atomic_set(&rb->refcount, 1);
|
||||
|
||||
|
@@ -75,6 +75,15 @@ struct uprobe {
|
||||
struct arch_uprobe arch;
|
||||
};
|
||||
|
||||
struct return_instance {
|
||||
struct uprobe *uprobe;
|
||||
unsigned long func;
|
||||
unsigned long orig_ret_vaddr; /* original return address */
|
||||
bool chained; /* true, if instance is nested */
|
||||
|
||||
struct return_instance *next; /* keep as stack */
|
||||
};
|
||||
|
||||
/*
|
||||
* valid_vma: Verify if the specified vma is an executable vma
|
||||
* Relax restrictions while unregistering: vm_flags might have
|
||||
@@ -173,10 +182,31 @@ bool __weak is_swbp_insn(uprobe_opcode_t *insn)
|
||||
return *insn == UPROBE_SWBP_INSN;
|
||||
}
|
||||
|
||||
static void copy_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *opcode)
|
||||
/**
|
||||
* is_trap_insn - check if instruction is breakpoint instruction.
|
||||
* @insn: instruction to be checked.
|
||||
* Default implementation of is_trap_insn
|
||||
* Returns true if @insn is a breakpoint instruction.
|
||||
*
|
||||
* This function is needed for the case where an architecture has multiple
|
||||
* trap instructions (like powerpc).
|
||||
*/
|
||||
bool __weak is_trap_insn(uprobe_opcode_t *insn)
|
||||
{
|
||||
return is_swbp_insn(insn);
|
||||
}
|
||||
|
||||
static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
|
||||
{
|
||||
void *kaddr = kmap_atomic(page);
|
||||
memcpy(opcode, kaddr + (vaddr & ~PAGE_MASK), UPROBE_SWBP_INSN_SIZE);
|
||||
memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
|
||||
kunmap_atomic(kaddr);
|
||||
}
|
||||
|
||||
static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
|
||||
{
|
||||
void *kaddr = kmap_atomic(page);
|
||||
memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
|
||||
kunmap_atomic(kaddr);
|
||||
}
|
||||
|
||||
@@ -185,7 +215,16 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
|
||||
uprobe_opcode_t old_opcode;
|
||||
bool is_swbp;
|
||||
|
||||
copy_opcode(page, vaddr, &old_opcode);
|
||||
/*
|
||||
* Note: We only check if the old_opcode is UPROBE_SWBP_INSN here.
|
||||
* We do not check if it is any other 'trap variant' which could
|
||||
* be conditional trap instruction such as the one powerpc supports.
|
||||
*
|
||||
* The logic is that we do not care if the underlying instruction
|
||||
* is a trap variant; uprobes always wins over any other (gdb)
|
||||
* breakpoint.
|
||||
*/
|
||||
copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
is_swbp = is_swbp_insn(&old_opcode);
|
||||
|
||||
if (is_swbp_insn(new_opcode)) {
|
||||
@@ -204,7 +243,7 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
|
||||
* Expect the breakpoint instruction to be the smallest size instruction for
|
||||
* the architecture. If an arch has variable length instruction and the
|
||||
* breakpoint instruction is not of the smallest length instruction
|
||||
* supported by that architecture then we need to modify is_swbp_at_addr and
|
||||
* supported by that architecture then we need to modify is_trap_at_addr and
|
||||
* write_opcode accordingly. This would never be a problem for archs that
|
||||
* have fixed length instructions.
|
||||
*/
|
||||
@@ -225,7 +264,6 @@ static int write_opcode(struct mm_struct *mm, unsigned long vaddr,
|
||||
uprobe_opcode_t opcode)
|
||||
{
|
||||
struct page *old_page, *new_page;
|
||||
void *vaddr_old, *vaddr_new;
|
||||
struct vm_area_struct *vma;
|
||||
int ret;
|
||||
|
||||
@@ -246,15 +284,8 @@ retry:
|
||||
|
||||
__SetPageUptodate(new_page);
|
||||
|
||||
/* copy the page now that we've got it stable */
|
||||
vaddr_old = kmap_atomic(old_page);
|
||||
vaddr_new = kmap_atomic(new_page);
|
||||
|
||||
memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
|
||||
memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
kunmap_atomic(vaddr_new);
|
||||
kunmap_atomic(vaddr_old);
|
||||
copy_highpage(new_page, old_page);
|
||||
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
ret = anon_vma_prepare(vma);
|
||||
if (ret)
|
||||
@@ -477,30 +508,18 @@ __copy_insn(struct address_space *mapping, struct file *filp, char *insn,
|
||||
unsigned long nbytes, loff_t offset)
|
||||
{
|
||||
struct page *page;
|
||||
void *vaddr;
|
||||
unsigned long off;
|
||||
pgoff_t idx;
|
||||
|
||||
if (!filp)
|
||||
return -EINVAL;
|
||||
|
||||
if (!mapping->a_ops->readpage)
|
||||
return -EIO;
|
||||
|
||||
idx = offset >> PAGE_CACHE_SHIFT;
|
||||
off = offset & ~PAGE_MASK;
|
||||
|
||||
/*
|
||||
* Ensure that the page that has the original instruction is
|
||||
* populated and in page-cache.
|
||||
*/
|
||||
page = read_mapping_page(mapping, idx, filp);
|
||||
page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
vaddr = kmap_atomic(page);
|
||||
memcpy(insn, vaddr + off, nbytes);
|
||||
kunmap_atomic(vaddr);
|
||||
copy_from_page(page, offset, insn, nbytes);
|
||||
page_cache_release(page);
|
||||
|
||||
return 0;
|
||||
@@ -550,7 +569,7 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
|
||||
goto out;
|
||||
|
||||
ret = -ENOTSUPP;
|
||||
if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
|
||||
if (is_trap_insn((uprobe_opcode_t *)uprobe->arch.insn))
|
||||
goto out;
|
||||
|
||||
ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr);
|
||||
@@ -758,7 +777,7 @@ register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
|
||||
down_write(&mm->mmap_sem);
|
||||
vma = find_vma(mm, info->vaddr);
|
||||
if (!vma || !valid_vma(vma, is_register) ||
|
||||
vma->vm_file->f_mapping->host != uprobe->inode)
|
||||
file_inode(vma->vm_file) != uprobe->inode)
|
||||
goto unlock;
|
||||
|
||||
if (vma->vm_start > info->vaddr ||
|
||||
@@ -828,6 +847,10 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
|
||||
struct uprobe *uprobe;
|
||||
int ret;
|
||||
|
||||
/* Uprobe must have at least one set consumer */
|
||||
if (!uc->handler && !uc->ret_handler)
|
||||
return -EINVAL;
|
||||
|
||||
/* Racy, just to catch the obvious mistakes */
|
||||
if (offset > i_size_read(inode))
|
||||
return -EINVAL;
|
||||
@@ -917,7 +940,7 @@ static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
|
||||
loff_t offset;
|
||||
|
||||
if (!valid_vma(vma, false) ||
|
||||
vma->vm_file->f_mapping->host != uprobe->inode)
|
||||
file_inode(vma->vm_file) != uprobe->inode)
|
||||
continue;
|
||||
|
||||
offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
|
||||
@@ -1010,7 +1033,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
|
||||
if (no_uprobe_events() || !valid_vma(vma, true))
|
||||
return 0;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
inode = file_inode(vma->vm_file);
|
||||
if (!inode)
|
||||
return 0;
|
||||
|
||||
@@ -1041,7 +1064,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
|
||||
struct inode *inode;
|
||||
struct rb_node *n;
|
||||
|
||||
inode = vma->vm_file->f_mapping->host;
|
||||
inode = file_inode(vma->vm_file);
|
||||
|
||||
min = vaddr_to_offset(vma, start);
|
||||
max = min + (end - start) - 1;
|
||||
@@ -1114,6 +1137,7 @@ static struct xol_area *get_xol_area(void)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct xol_area *area;
|
||||
uprobe_opcode_t insn = UPROBE_SWBP_INSN;
|
||||
|
||||
area = mm->uprobes_state.xol_area;
|
||||
if (area)
|
||||
@@ -1131,7 +1155,12 @@ static struct xol_area *get_xol_area(void)
|
||||
if (!area->page)
|
||||
goto free_bitmap;
|
||||
|
||||
/* allocate first slot of task's xol_area for the return probes */
|
||||
set_bit(0, area->bitmap);
|
||||
copy_to_page(area->page, 0, &insn, UPROBE_SWBP_INSN_SIZE);
|
||||
atomic_set(&area->slot_count, 1);
|
||||
init_waitqueue_head(&area->wq);
|
||||
|
||||
if (!xol_add_vma(area))
|
||||
return area;
|
||||
|
||||
@@ -1216,9 +1245,7 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
|
||||
static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long offset;
|
||||
unsigned long xol_vaddr;
|
||||
void *vaddr;
|
||||
|
||||
area = get_xol_area();
|
||||
if (!area)
|
||||
@@ -1229,10 +1256,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
|
||||
return 0;
|
||||
|
||||
/* Initialize the slot */
|
||||
offset = xol_vaddr & ~PAGE_MASK;
|
||||
vaddr = kmap_atomic(area->page);
|
||||
memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
|
||||
kunmap_atomic(vaddr);
|
||||
copy_to_page(area->page, xol_vaddr, uprobe->arch.insn, MAX_UINSN_BYTES);
|
||||
/*
|
||||
* We probably need flush_icache_user_range() but it needs vma.
|
||||
* This should work on supported architectures too.
|
||||
@@ -1298,6 +1322,7 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
|
||||
void uprobe_free_utask(struct task_struct *t)
|
||||
{
|
||||
struct uprobe_task *utask = t->utask;
|
||||
struct return_instance *ri, *tmp;
|
||||
|
||||
if (!utask)
|
||||
return;
|
||||
@@ -1305,6 +1330,15 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
if (utask->active_uprobe)
|
||||
put_uprobe(utask->active_uprobe);
|
||||
|
||||
ri = utask->return_instances;
|
||||
while (ri) {
|
||||
tmp = ri;
|
||||
ri = ri->next;
|
||||
|
||||
put_uprobe(tmp->uprobe);
|
||||
kfree(tmp);
|
||||
}
|
||||
|
||||
xol_free_insn_slot(t);
|
||||
kfree(utask);
|
||||
t->utask = NULL;
|
||||
@@ -1333,6 +1367,93 @@ static struct uprobe_task *get_utask(void)
|
||||
return current->utask;
|
||||
}
|
||||
|
||||
/*
|
||||
* Current area->vaddr notion assume the trampoline address is always
|
||||
* equal area->vaddr.
|
||||
*
|
||||
* Returns -1 in case the xol_area is not allocated.
|
||||
*/
|
||||
static unsigned long get_trampoline_vaddr(void)
|
||||
{
|
||||
struct xol_area *area;
|
||||
unsigned long trampoline_vaddr = -1;
|
||||
|
||||
area = current->mm->uprobes_state.xol_area;
|
||||
smp_read_barrier_depends();
|
||||
if (area)
|
||||
trampoline_vaddr = area->vaddr;
|
||||
|
||||
return trampoline_vaddr;
|
||||
}
|
||||
|
||||
static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct return_instance *ri;
|
||||
struct uprobe_task *utask;
|
||||
unsigned long orig_ret_vaddr, trampoline_vaddr;
|
||||
bool chained = false;
|
||||
|
||||
if (!get_xol_area())
|
||||
return;
|
||||
|
||||
utask = get_utask();
|
||||
if (!utask)
|
||||
return;
|
||||
|
||||
if (utask->depth >= MAX_URETPROBE_DEPTH) {
|
||||
printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
|
||||
" nestedness limit pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
return;
|
||||
}
|
||||
|
||||
ri = kzalloc(sizeof(struct return_instance), GFP_KERNEL);
|
||||
if (!ri)
|
||||
goto fail;
|
||||
|
||||
trampoline_vaddr = get_trampoline_vaddr();
|
||||
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
|
||||
if (orig_ret_vaddr == -1)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* We don't want to keep trampoline address in stack, rather keep the
|
||||
* original return address of first caller thru all the consequent
|
||||
* instances. This also makes breakpoint unwrapping easier.
|
||||
*/
|
||||
if (orig_ret_vaddr == trampoline_vaddr) {
|
||||
if (!utask->return_instances) {
|
||||
/*
|
||||
* This situation is not possible. Likely we have an
|
||||
* attack from user-space.
|
||||
*/
|
||||
pr_warn("uprobe: unable to set uretprobe pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
chained = true;
|
||||
orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
|
||||
}
|
||||
|
||||
atomic_inc(&uprobe->ref);
|
||||
ri->uprobe = uprobe;
|
||||
ri->func = instruction_pointer(regs);
|
||||
ri->orig_ret_vaddr = orig_ret_vaddr;
|
||||
ri->chained = chained;
|
||||
|
||||
utask->depth++;
|
||||
|
||||
/* add instance to the stack */
|
||||
ri->next = utask->return_instances;
|
||||
utask->return_instances = ri;
|
||||
|
||||
return;
|
||||
|
||||
fail:
|
||||
kfree(ri);
|
||||
}
|
||||
|
||||
/* Prepare to single-step probed instruction out of line. */
|
||||
static int
|
||||
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
|
||||
@@ -1431,7 +1552,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
|
||||
clear_bit(MMF_HAS_UPROBES, &mm->flags);
|
||||
}
|
||||
|
||||
static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
{
|
||||
struct page *page;
|
||||
uprobe_opcode_t opcode;
|
||||
@@ -1449,10 +1570,11 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
|
||||
if (result < 0)
|
||||
return result;
|
||||
|
||||
copy_opcode(page, vaddr, &opcode);
|
||||
copy_from_page(page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
put_page(page);
|
||||
out:
|
||||
return is_swbp_insn(&opcode);
|
||||
/* This needs to return true for any variant of the trap insn */
|
||||
return is_trap_insn(&opcode);
|
||||
}
|
||||
|
||||
static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
|
||||
@@ -1465,14 +1587,14 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
|
||||
vma = find_vma(mm, bp_vaddr);
|
||||
if (vma && vma->vm_start <= bp_vaddr) {
|
||||
if (valid_vma(vma, false)) {
|
||||
struct inode *inode = vma->vm_file->f_mapping->host;
|
||||
struct inode *inode = file_inode(vma->vm_file);
|
||||
loff_t offset = vaddr_to_offset(vma, bp_vaddr);
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
}
|
||||
|
||||
if (!uprobe)
|
||||
*is_swbp = is_swbp_at_addr(mm, bp_vaddr);
|
||||
*is_swbp = is_trap_at_addr(mm, bp_vaddr);
|
||||
} else {
|
||||
*is_swbp = -EFAULT;
|
||||
}
|
||||
@@ -1488,16 +1610,27 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_consumer *uc;
|
||||
int remove = UPROBE_HANDLER_REMOVE;
|
||||
bool need_prep = false; /* prepare return uprobe, when needed */
|
||||
|
||||
down_read(&uprobe->register_rwsem);
|
||||
for (uc = uprobe->consumers; uc; uc = uc->next) {
|
||||
int rc = uc->handler(uc, regs);
|
||||
int rc = 0;
|
||||
|
||||
if (uc->handler) {
|
||||
rc = uc->handler(uc, regs);
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
"bad rc=0x%x from %pf()\n", rc, uc->handler);
|
||||
}
|
||||
|
||||
if (uc->ret_handler)
|
||||
need_prep = true;
|
||||
|
||||
WARN(rc & ~UPROBE_HANDLER_MASK,
|
||||
"bad rc=0x%x from %pf()\n", rc, uc->handler);
|
||||
remove &= rc;
|
||||
}
|
||||
|
||||
if (need_prep && !remove)
|
||||
prepare_uretprobe(uprobe, regs); /* put bp at return */
|
||||
|
||||
if (remove && uprobe->consumers) {
|
||||
WARN_ON(!uprobe_is_active(uprobe));
|
||||
unapply_uprobe(uprobe, current->mm);
|
||||
@@ -1505,6 +1638,64 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
|
||||
up_read(&uprobe->register_rwsem);
|
||||
}
|
||||
|
||||
static void
|
||||
handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe *uprobe = ri->uprobe;
|
||||
struct uprobe_consumer *uc;
|
||||
|
||||
down_read(&uprobe->register_rwsem);
|
||||
for (uc = uprobe->consumers; uc; uc = uc->next) {
|
||||
if (uc->ret_handler)
|
||||
uc->ret_handler(uc, ri->func, regs);
|
||||
}
|
||||
up_read(&uprobe->register_rwsem);
|
||||
}
|
||||
|
||||
static bool handle_trampoline(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask;
|
||||
struct return_instance *ri, *tmp;
|
||||
bool chained;
|
||||
|
||||
utask = current->utask;
|
||||
if (!utask)
|
||||
return false;
|
||||
|
||||
ri = utask->return_instances;
|
||||
if (!ri)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* TODO: we should throw out return_instance's invalidated by
|
||||
* longjmp(), currently we assume that the probed function always
|
||||
* returns.
|
||||
*/
|
||||
instruction_pointer_set(regs, ri->orig_ret_vaddr);
|
||||
|
||||
for (;;) {
|
||||
handle_uretprobe_chain(ri, regs);
|
||||
|
||||
chained = ri->chained;
|
||||
put_uprobe(ri->uprobe);
|
||||
|
||||
tmp = ri;
|
||||
ri = ri->next;
|
||||
kfree(tmp);
|
||||
|
||||
if (!chained)
|
||||
break;
|
||||
|
||||
utask->depth--;
|
||||
|
||||
BUG_ON(!ri);
|
||||
}
|
||||
|
||||
utask->return_instances = ri;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Run handler and ask thread to singlestep.
|
||||
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
|
||||
@@ -1516,8 +1707,15 @@ static void handle_swbp(struct pt_regs *regs)
|
||||
int uninitialized_var(is_swbp);
|
||||
|
||||
bp_vaddr = uprobe_get_swbp_addr(regs);
|
||||
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
|
||||
if (bp_vaddr == get_trampoline_vaddr()) {
|
||||
if (handle_trampoline(regs))
|
||||
return;
|
||||
|
||||
pr_warn("uprobe: unable to handle uretprobe pid/tgid=%d/%d\n",
|
||||
current->pid, current->tgid);
|
||||
}
|
||||
|
||||
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
|
||||
if (!uprobe) {
|
||||
if (is_swbp > 0) {
|
||||
/* No matching uprobe; signal SIGTRAP. */
|
||||
@@ -1616,7 +1814,11 @@ void uprobe_notify_resume(struct pt_regs *regs)
|
||||
*/
|
||||
int uprobe_pre_sstep_notifier(struct pt_regs *regs)
|
||||
{
|
||||
if (!current->mm || !test_bit(MMF_HAS_UPROBES, ¤t->mm->flags))
|
||||
if (!current->mm)
|
||||
return 0;
|
||||
|
||||
if (!test_bit(MMF_HAS_UPROBES, ¤t->mm->flags) &&
|
||||
(!current->utask || !current->utask->return_instances))
|
||||
return 0;
|
||||
|
||||
set_thread_flag(TIF_UPROBE);
|
||||
|
@@ -835,7 +835,7 @@ void do_exit(long code)
|
||||
/*
|
||||
* Make sure we are holding no locks:
|
||||
*/
|
||||
debug_check_no_locks_held();
|
||||
debug_check_no_locks_held(tsk);
|
||||
/*
|
||||
* We can do this unlocked here. The futex code uses this flag
|
||||
* just to verify whether the pi state cleanup has been done
|
||||
|
@@ -1141,6 +1141,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
* Thread groups must share signals as well, and detached threads
|
||||
* can only be started up within the thread group.
|
||||
@@ -1807,7 +1810,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
|
||||
* If unsharing a user namespace must also unshare the thread.
|
||||
*/
|
||||
if (unshare_flags & CLONE_NEWUSER)
|
||||
unshare_flags |= CLONE_THREAD;
|
||||
unshare_flags |= CLONE_THREAD | CLONE_FS;
|
||||
/*
|
||||
* If unsharing a pid namespace must also unshare the thread.
|
||||
*/
|
||||
|
@@ -223,7 +223,8 @@ static void drop_futex_key_refs(union futex_key *key)
|
||||
* @rw: mapping needs to be read/write (values: VERIFY_READ,
|
||||
* VERIFY_WRITE)
|
||||
*
|
||||
* Returns a negative error code or 0
|
||||
* Return: a negative error code or 0
|
||||
*
|
||||
* The key words are stored in *key on success.
|
||||
*
|
||||
* For shared mappings, it's (page->index, file_inode(vma->vm_file),
|
||||
@@ -705,9 +706,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
|
||||
* be "current" except in the case of requeue pi.
|
||||
* @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
|
||||
*
|
||||
* Returns:
|
||||
* 0 - ready to wait
|
||||
* 1 - acquired the lock
|
||||
* Return:
|
||||
* 0 - ready to wait;
|
||||
* 1 - acquired the lock;
|
||||
* <0 - error
|
||||
*
|
||||
* The hb->lock and futex_key refs shall be held by the caller.
|
||||
@@ -1191,9 +1192,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
|
||||
* then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
|
||||
* hb1 and hb2 must be held by the caller.
|
||||
*
|
||||
* Returns:
|
||||
* 0 - failed to acquire the lock atomicly
|
||||
* 1 - acquired the lock
|
||||
* Return:
|
||||
* 0 - failed to acquire the lock atomically;
|
||||
* 1 - acquired the lock;
|
||||
* <0 - error
|
||||
*/
|
||||
static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
@@ -1254,8 +1255,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
|
||||
* Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
|
||||
* uaddr2 atomically on behalf of the top waiter.
|
||||
*
|
||||
* Returns:
|
||||
* >=0 - on success, the number of tasks requeued or woken
|
||||
* Return:
|
||||
* >=0 - on success, the number of tasks requeued or woken;
|
||||
* <0 - on error
|
||||
*/
|
||||
static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
@@ -1536,8 +1537,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
|
||||
* The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
|
||||
* be paired with exactly one earlier call to queue_me().
|
||||
*
|
||||
* Returns:
|
||||
* 1 - if the futex_q was still queued (and we removed unqueued it)
|
||||
* Return:
|
||||
* 1 - if the futex_q was still queued (and we removed unqueued it);
|
||||
* 0 - if the futex_q was already removed by the waking thread
|
||||
*/
|
||||
static int unqueue_me(struct futex_q *q)
|
||||
@@ -1707,9 +1708,9 @@ static long futex_wait_restart(struct restart_block *restart);
|
||||
* the pi_state owner as well as handle race conditions that may allow us to
|
||||
* acquire the lock. Must be called with the hb lock held.
|
||||
*
|
||||
* Returns:
|
||||
* 1 - success, lock taken
|
||||
* 0 - success, lock not taken
|
||||
* Return:
|
||||
* 1 - success, lock taken;
|
||||
* 0 - success, lock not taken;
|
||||
* <0 - on error (-EFAULT)
|
||||
*/
|
||||
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||||
@@ -1824,8 +1825,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
|
||||
* Return with the hb lock held and a q.key reference on success, and unlocked
|
||||
* with no q.key reference on failure.
|
||||
*
|
||||
* Returns:
|
||||
* 0 - uaddr contains val and hb has been locked
|
||||
* Return:
|
||||
* 0 - uaddr contains val and hb has been locked;
|
||||
* <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
|
||||
*/
|
||||
static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
|
||||
@@ -2203,9 +2204,9 @@ pi_faulted:
|
||||
* the wakeup and return the appropriate error code to the caller. Must be
|
||||
* called with the hb lock held.
|
||||
*
|
||||
* Returns
|
||||
* 0 - no early wakeup detected
|
||||
* <0 - -ETIMEDOUT or -ERESTARTNOINTR
|
||||
* Return:
|
||||
* 0 = no early wakeup detected;
|
||||
* <0 = -ETIMEDOUT or -ERESTARTNOINTR
|
||||
*/
|
||||
static inline
|
||||
int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
||||
@@ -2247,7 +2248,6 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
||||
* @val: the expected value of uaddr
|
||||
* @abs_time: absolute timeout
|
||||
* @bitset: 32 bit wakeup bitset set by userspace, defaults to all
|
||||
* @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
|
||||
* @uaddr2: the pi futex we will take prior to returning to user-space
|
||||
*
|
||||
* The caller will wait on uaddr and will be requeued by futex_requeue() to
|
||||
@@ -2258,7 +2258,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
||||
* there was a need to.
|
||||
*
|
||||
* We call schedule in futex_wait_queue_me() when we enqueue and return there
|
||||
* via the following:
|
||||
* via the following--
|
||||
* 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
|
||||
* 2) wakeup on uaddr2 after a requeue
|
||||
* 3) signal
|
||||
@@ -2276,8 +2276,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
|
||||
*
|
||||
* If 4 or 7, we cleanup and return with -ETIMEDOUT.
|
||||
*
|
||||
* Returns:
|
||||
* 0 - On success
|
||||
* Return:
|
||||
* 0 - On success;
|
||||
* <0 - On error
|
||||
*/
|
||||
static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
|
@@ -63,6 +63,7 @@
|
||||
DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
|
||||
{
|
||||
|
||||
.lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
|
||||
.clock_base =
|
||||
{
|
||||
{
|
||||
@@ -1642,8 +1643,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
|
||||
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
|
||||
int i;
|
||||
|
||||
raw_spin_lock_init(&cpu_base->lock);
|
||||
|
||||
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
|
||||
cpu_base->clock_base[i].cpu_base = cpu_base;
|
||||
timerqueue_init_head(&cpu_base->clock_base[i].active);
|
||||
|
131
kernel/kexec.c
131
kernel/kexec.c
@@ -55,7 +55,7 @@ struct resource crashk_res = {
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
};
|
||||
struct resource crashk_low_res = {
|
||||
.name = "Crash kernel low",
|
||||
.name = "Crash kernel",
|
||||
.start = 0,
|
||||
.end = 0,
|
||||
.flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
||||
@@ -1118,12 +1118,8 @@ void __weak crash_free_reserved_phys_range(unsigned long begin,
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
||||
ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
|
||||
init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
|
||||
free_page((unsigned long)__va(addr));
|
||||
totalram_pages++;
|
||||
}
|
||||
for (addr = begin; addr < end; addr += PAGE_SIZE)
|
||||
free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
|
||||
}
|
||||
|
||||
int crash_shrink_memory(unsigned long new_size)
|
||||
@@ -1368,35 +1364,114 @@ static int __init parse_crashkernel_simple(char *cmdline,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define SUFFIX_HIGH 0
|
||||
#define SUFFIX_LOW 1
|
||||
#define SUFFIX_NULL 2
|
||||
static __initdata char *suffix_tbl[] = {
|
||||
[SUFFIX_HIGH] = ",high",
|
||||
[SUFFIX_LOW] = ",low",
|
||||
[SUFFIX_NULL] = NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* That function is the entry point for command line parsing and should be
|
||||
* called from the arch-specific code.
|
||||
* That function parses "suffix" crashkernel command lines like
|
||||
*
|
||||
* crashkernel=size,[high|low]
|
||||
*
|
||||
* It returns 0 on success and -EINVAL on failure.
|
||||
*/
|
||||
static int __init parse_crashkernel_suffix(char *cmdline,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base,
|
||||
const char *suffix)
|
||||
{
|
||||
char *cur = cmdline;
|
||||
|
||||
*crash_size = memparse(cmdline, &cur);
|
||||
if (cmdline == cur) {
|
||||
pr_warn("crashkernel: memory value expected\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* check with suffix */
|
||||
if (strncmp(cur, suffix, strlen(suffix))) {
|
||||
pr_warn("crashkernel: unrecognized char\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
cur += strlen(suffix);
|
||||
if (*cur != ' ' && *cur != '\0') {
|
||||
pr_warn("crashkernel: unrecognized char\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init char *get_last_crashkernel(char *cmdline,
|
||||
const char *name,
|
||||
const char *suffix)
|
||||
{
|
||||
char *p = cmdline, *ck_cmdline = NULL;
|
||||
|
||||
/* find crashkernel and use the last one if there are more */
|
||||
p = strstr(p, name);
|
||||
while (p) {
|
||||
char *end_p = strchr(p, ' ');
|
||||
char *q;
|
||||
|
||||
if (!end_p)
|
||||
end_p = p + strlen(p);
|
||||
|
||||
if (!suffix) {
|
||||
int i;
|
||||
|
||||
/* skip the one with any known suffix */
|
||||
for (i = 0; suffix_tbl[i]; i++) {
|
||||
q = end_p - strlen(suffix_tbl[i]);
|
||||
if (!strncmp(q, suffix_tbl[i],
|
||||
strlen(suffix_tbl[i])))
|
||||
goto next;
|
||||
}
|
||||
ck_cmdline = p;
|
||||
} else {
|
||||
q = end_p - strlen(suffix);
|
||||
if (!strncmp(q, suffix, strlen(suffix)))
|
||||
ck_cmdline = p;
|
||||
}
|
||||
next:
|
||||
p = strstr(p+1, name);
|
||||
}
|
||||
|
||||
if (!ck_cmdline)
|
||||
return NULL;
|
||||
|
||||
return ck_cmdline;
|
||||
}
|
||||
|
||||
static int __init __parse_crashkernel(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base,
|
||||
const char *name)
|
||||
const char *name,
|
||||
const char *suffix)
|
||||
{
|
||||
char *p = cmdline, *ck_cmdline = NULL;
|
||||
char *first_colon, *first_space;
|
||||
char *ck_cmdline;
|
||||
|
||||
BUG_ON(!crash_size || !crash_base);
|
||||
*crash_size = 0;
|
||||
*crash_base = 0;
|
||||
|
||||
/* find crashkernel and use the last one if there are more */
|
||||
p = strstr(p, name);
|
||||
while (p) {
|
||||
ck_cmdline = p;
|
||||
p = strstr(p+1, name);
|
||||
}
|
||||
ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
|
||||
|
||||
if (!ck_cmdline)
|
||||
return -EINVAL;
|
||||
|
||||
ck_cmdline += strlen(name);
|
||||
|
||||
if (suffix)
|
||||
return parse_crashkernel_suffix(ck_cmdline, crash_size,
|
||||
crash_base, suffix);
|
||||
/*
|
||||
* if the commandline contains a ':', then that's the extended
|
||||
* syntax -- if not, it must be the classic syntax
|
||||
@@ -1413,13 +1488,26 @@ static int __init __parse_crashkernel(char *cmdline,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* That function is the entry point for command line parsing and should be
|
||||
* called from the arch-specific code.
|
||||
*/
|
||||
int __init parse_crashkernel(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base)
|
||||
{
|
||||
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
|
||||
"crashkernel=");
|
||||
"crashkernel=", NULL);
|
||||
}
|
||||
|
||||
int __init parse_crashkernel_high(char *cmdline,
|
||||
unsigned long long system_ram,
|
||||
unsigned long long *crash_size,
|
||||
unsigned long long *crash_base)
|
||||
{
|
||||
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
|
||||
"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
|
||||
}
|
||||
|
||||
int __init parse_crashkernel_low(char *cmdline,
|
||||
@@ -1428,7 +1516,7 @@ int __init parse_crashkernel_low(char *cmdline,
|
||||
unsigned long long *crash_base)
|
||||
{
|
||||
return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
|
||||
"crashkernel_low=");
|
||||
"crashkernel=", suffix_tbl[SUFFIX_LOW]);
|
||||
}
|
||||
|
||||
static void update_vmcoreinfo_note(void)
|
||||
@@ -1489,7 +1577,7 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
VMCOREINFO_SYMBOL(swapper_pg_dir);
|
||||
#endif
|
||||
VMCOREINFO_SYMBOL(_stext);
|
||||
VMCOREINFO_SYMBOL(vmlist);
|
||||
VMCOREINFO_SYMBOL(vmap_area_list);
|
||||
|
||||
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
||||
VMCOREINFO_SYMBOL(mem_map);
|
||||
@@ -1527,7 +1615,8 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
VMCOREINFO_OFFSET(free_area, free_list);
|
||||
VMCOREINFO_OFFSET(list_head, next);
|
||||
VMCOREINFO_OFFSET(list_head, prev);
|
||||
VMCOREINFO_OFFSET(vm_struct, addr);
|
||||
VMCOREINFO_OFFSET(vmap_area, va_start);
|
||||
VMCOREINFO_OFFSET(vmap_area, list);
|
||||
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
|
||||
log_buf_kexec_setup();
|
||||
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
|
||||
|
@@ -794,16 +794,16 @@ out:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* This should be called with kprobe_mutex locked */
|
||||
static void __kprobes optimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
unsigned int i;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
/* If optimization is already allowed, just return */
|
||||
if (kprobes_allow_optimization)
|
||||
return;
|
||||
goto out;
|
||||
|
||||
kprobes_allow_optimization = true;
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
@@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void)
|
||||
optimize_kprobe(p);
|
||||
}
|
||||
printk(KERN_INFO "Kprobes globally optimized\n");
|
||||
out:
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
}
|
||||
|
||||
/* This should be called with kprobe_mutex locked */
|
||||
static void __kprobes unoptimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
unsigned int i;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
/* If optimization is already prohibited, just return */
|
||||
if (!kprobes_allow_optimization)
|
||||
if (!kprobes_allow_optimization) {
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
kprobes_allow_optimization = false;
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
@@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void)
|
||||
unoptimize_kprobe(p, false);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
|
||||
/* Wait for unoptimizing completion */
|
||||
wait_for_kprobe_optimizer();
|
||||
printk(KERN_INFO "Kprobes globally unoptimized\n");
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(kprobe_sysctl_mutex);
|
||||
int sysctl_kprobes_optimization;
|
||||
int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *length,
|
||||
@@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&kprobe_mutex);
|
||||
mutex_lock(&kprobe_sysctl_mutex);
|
||||
sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
|
||||
ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
|
||||
|
||||
@@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
|
||||
optimize_all_kprobes();
|
||||
else
|
||||
unoptimize_all_kprobes();
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
mutex_unlock(&kprobe_sysctl_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -52,8 +52,21 @@ enum KTHREAD_BITS {
|
||||
KTHREAD_IS_PARKED,
|
||||
};
|
||||
|
||||
#define to_kthread(tsk) \
|
||||
container_of((tsk)->vfork_done, struct kthread, exited)
|
||||
#define __to_kthread(vfork) \
|
||||
container_of(vfork, struct kthread, exited)
|
||||
|
||||
static inline struct kthread *to_kthread(struct task_struct *k)
|
||||
{
|
||||
return __to_kthread(k->vfork_done);
|
||||
}
|
||||
|
||||
static struct kthread *to_live_kthread(struct task_struct *k)
|
||||
{
|
||||
struct completion *vfork = ACCESS_ONCE(k->vfork_done);
|
||||
if (likely(vfork))
|
||||
return __to_kthread(vfork);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* kthread_should_stop - should this kthread return now?
|
||||
@@ -124,12 +137,12 @@ void *kthread_data(struct task_struct *task)
|
||||
|
||||
static void __kthread_parkme(struct kthread *self)
|
||||
{
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
__set_current_state(TASK_PARKED);
|
||||
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
|
||||
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
|
||||
complete(&self->parked);
|
||||
schedule();
|
||||
__set_current_state(TASK_INTERRUPTIBLE);
|
||||
__set_current_state(TASK_PARKED);
|
||||
}
|
||||
clear_bit(KTHREAD_IS_PARKED, &self->flags);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
@@ -256,11 +269,16 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_create_on_node);
|
||||
|
||||
static void __kthread_bind(struct task_struct *p, unsigned int cpu)
|
||||
static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
|
||||
{
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
if (!wait_task_inactive(p, state)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
/* It's safe because the task is inactive. */
|
||||
do_set_cpus_allowed(p, cpumask_of(cpu));
|
||||
p->flags |= PF_THREAD_BOUND;
|
||||
p->flags |= PF_NO_SETAFFINITY;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -274,12 +292,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
|
||||
*/
|
||||
void kthread_bind(struct task_struct *p, unsigned int cpu)
|
||||
{
|
||||
/* Must have done schedule() in kthread() before we set_task_cpu */
|
||||
if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
|
||||
WARN_ON(1);
|
||||
return;
|
||||
}
|
||||
__kthread_bind(p, cpu);
|
||||
__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_bind);
|
||||
|
||||
@@ -311,17 +324,20 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
|
||||
return p;
|
||||
}
|
||||
|
||||
static struct kthread *task_get_live_kthread(struct task_struct *k)
|
||||
static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
|
||||
{
|
||||
struct kthread *kthread;
|
||||
|
||||
get_task_struct(k);
|
||||
kthread = to_kthread(k);
|
||||
/* It might have exited */
|
||||
barrier();
|
||||
if (k->vfork_done != NULL)
|
||||
return kthread;
|
||||
return NULL;
|
||||
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
|
||||
/*
|
||||
* We clear the IS_PARKED bit here as we don't wait
|
||||
* until the task has left the park code. So if we'd
|
||||
* park before that happens we'd see the IS_PARKED bit
|
||||
* which might be about to be cleared.
|
||||
*/
|
||||
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
|
||||
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
|
||||
__kthread_bind(k, kthread->cpu, TASK_PARKED);
|
||||
wake_up_state(k, TASK_PARKED);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -334,23 +350,10 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
|
||||
*/
|
||||
void kthread_unpark(struct task_struct *k)
|
||||
{
|
||||
struct kthread *kthread = task_get_live_kthread(k);
|
||||
struct kthread *kthread = to_live_kthread(k);
|
||||
|
||||
if (kthread) {
|
||||
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
|
||||
/*
|
||||
* We clear the IS_PARKED bit here as we don't wait
|
||||
* until the task has left the park code. So if we'd
|
||||
* park before that happens we'd see the IS_PARKED bit
|
||||
* which might be about to be cleared.
|
||||
*/
|
||||
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
|
||||
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
|
||||
__kthread_bind(k, kthread->cpu);
|
||||
wake_up_process(k);
|
||||
}
|
||||
}
|
||||
put_task_struct(k);
|
||||
if (kthread)
|
||||
__kthread_unpark(k, kthread);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -367,7 +370,7 @@ void kthread_unpark(struct task_struct *k)
|
||||
*/
|
||||
int kthread_park(struct task_struct *k)
|
||||
{
|
||||
struct kthread *kthread = task_get_live_kthread(k);
|
||||
struct kthread *kthread = to_live_kthread(k);
|
||||
int ret = -ENOSYS;
|
||||
|
||||
if (kthread) {
|
||||
@@ -380,7 +383,6 @@ int kthread_park(struct task_struct *k)
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
put_task_struct(k);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -401,21 +403,23 @@ int kthread_park(struct task_struct *k)
|
||||
*/
|
||||
int kthread_stop(struct task_struct *k)
|
||||
{
|
||||
struct kthread *kthread = task_get_live_kthread(k);
|
||||
struct kthread *kthread;
|
||||
int ret;
|
||||
|
||||
trace_sched_kthread_stop(k);
|
||||
|
||||
get_task_struct(k);
|
||||
kthread = to_live_kthread(k);
|
||||
if (kthread) {
|
||||
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
|
||||
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
|
||||
__kthread_unpark(k, kthread);
|
||||
wake_up_process(k);
|
||||
wait_for_completion(&kthread->exited);
|
||||
}
|
||||
ret = k->exit_code;
|
||||
|
||||
put_task_struct(k);
|
||||
trace_sched_kthread_stop_ret(ret);
|
||||
|
||||
trace_sched_kthread_stop_ret(ret);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(kthread_stop);
|
||||
|
@@ -380,6 +380,13 @@ static int verbose(struct lock_class *class)
|
||||
unsigned long nr_stack_trace_entries;
|
||||
static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
|
||||
|
||||
static void print_lockdep_off(const char *bug_msg)
|
||||
{
|
||||
printk(KERN_DEBUG "%s\n", bug_msg);
|
||||
printk(KERN_DEBUG "turning off the locking correctness validator.\n");
|
||||
printk(KERN_DEBUG "Please attach the output of /proc/lock_stat to the bug report\n");
|
||||
}
|
||||
|
||||
static int save_trace(struct stack_trace *trace)
|
||||
{
|
||||
trace->nr_entries = 0;
|
||||
@@ -409,8 +416,7 @@ static int save_trace(struct stack_trace *trace)
|
||||
if (!debug_locks_off_graph_unlock())
|
||||
return 0;
|
||||
|
||||
printk("BUG: MAX_STACK_TRACE_ENTRIES too low!\n");
|
||||
printk("turning off the locking correctness validator.\n");
|
||||
print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
|
||||
dump_stack();
|
||||
|
||||
return 0;
|
||||
@@ -763,8 +769,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
|
||||
}
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
|
||||
printk("turning off the locking correctness validator.\n");
|
||||
print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
|
||||
dump_stack();
|
||||
return NULL;
|
||||
}
|
||||
@@ -834,8 +839,7 @@ static struct lock_list *alloc_list_entry(void)
|
||||
if (!debug_locks_off_graph_unlock())
|
||||
return NULL;
|
||||
|
||||
printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
|
||||
printk("turning off the locking correctness validator.\n");
|
||||
print_lockdep_off("BUG: MAX_LOCKDEP_ENTRIES too low!");
|
||||
dump_stack();
|
||||
return NULL;
|
||||
}
|
||||
@@ -2000,7 +2004,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
struct lock_class *class = hlock_class(hlock);
|
||||
struct list_head *hash_head = chainhashentry(chain_key);
|
||||
struct lock_chain *chain;
|
||||
struct held_lock *hlock_curr, *hlock_next;
|
||||
struct held_lock *hlock_curr;
|
||||
int i, j;
|
||||
|
||||
/*
|
||||
@@ -2048,8 +2052,7 @@ cache_hit:
|
||||
if (!debug_locks_off_graph_unlock())
|
||||
return 0;
|
||||
|
||||
printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
|
||||
printk("turning off the locking correctness validator.\n");
|
||||
print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!");
|
||||
dump_stack();
|
||||
return 0;
|
||||
}
|
||||
@@ -2057,12 +2060,10 @@ cache_hit:
|
||||
chain->chain_key = chain_key;
|
||||
chain->irq_context = hlock->irq_context;
|
||||
/* Find the first held_lock of current chain */
|
||||
hlock_next = hlock;
|
||||
for (i = curr->lockdep_depth - 1; i >= 0; i--) {
|
||||
hlock_curr = curr->held_locks + i;
|
||||
if (hlock_curr->irq_context != hlock_next->irq_context)
|
||||
if (hlock_curr->irq_context != hlock->irq_context)
|
||||
break;
|
||||
hlock_next = hlock;
|
||||
}
|
||||
i++;
|
||||
chain->depth = curr->lockdep_depth + 1 - i;
|
||||
@@ -3190,9 +3191,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
#endif
|
||||
if (unlikely(curr->lockdep_depth >= MAX_LOCK_DEPTH)) {
|
||||
debug_locks_off();
|
||||
printk("BUG: MAX_LOCK_DEPTH too low, depth: %i max: %lu!\n",
|
||||
print_lockdep_off("BUG: MAX_LOCK_DEPTH too low!");
|
||||
printk(KERN_DEBUG "depth: %i max: %lu!\n",
|
||||
curr->lockdep_depth, MAX_LOCK_DEPTH);
|
||||
printk("turning off the locking correctness validator.\n");
|
||||
|
||||
lockdep_print_held_locks(current);
|
||||
debug_show_all_locks();
|
||||
@@ -4088,7 +4089,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
|
||||
|
||||
static void print_held_locks_bug(void)
|
||||
static void print_held_locks_bug(struct task_struct *curr)
|
||||
{
|
||||
if (!debug_locks_off())
|
||||
return;
|
||||
@@ -4097,21 +4098,22 @@ static void print_held_locks_bug(void)
|
||||
|
||||
printk("\n");
|
||||
printk("=====================================\n");
|
||||
printk("[ BUG: %s/%d still has locks held! ]\n",
|
||||
current->comm, task_pid_nr(current));
|
||||
printk("[ BUG: lock held at task exit time! ]\n");
|
||||
print_kernel_ident();
|
||||
printk("-------------------------------------\n");
|
||||
lockdep_print_held_locks(current);
|
||||
printk("%s/%d is exiting with locks still held!\n",
|
||||
curr->comm, task_pid_nr(curr));
|
||||
lockdep_print_held_locks(curr);
|
||||
|
||||
printk("\nstack backtrace:\n");
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
void debug_check_no_locks_held(void)
|
||||
void debug_check_no_locks_held(struct task_struct *task)
|
||||
{
|
||||
if (unlikely(current->lockdep_depth > 0))
|
||||
print_held_locks_bug();
|
||||
if (unlikely(task->lockdep_depth > 0))
|
||||
print_held_locks_bug(task);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
|
||||
|
||||
void debug_show_all_locks(void)
|
||||
{
|
||||
|
151
kernel/mutex.c
151
kernel/mutex.c
@@ -37,6 +37,12 @@
|
||||
# include <asm/mutex.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A negative mutex count indicates that waiters are sleeping waiting for the
|
||||
* mutex.
|
||||
*/
|
||||
#define MUTEX_SHOW_NO_WAITER(mutex) (atomic_read(&(mutex)->count) >= 0)
|
||||
|
||||
void
|
||||
__mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
|
||||
{
|
||||
@@ -44,6 +50,9 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key)
|
||||
spin_lock_init(&lock->wait_lock);
|
||||
INIT_LIST_HEAD(&lock->wait_list);
|
||||
mutex_clear_owner(lock);
|
||||
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
||||
lock->spin_mlock = NULL;
|
||||
#endif
|
||||
|
||||
debug_mutex_init(lock, name, key);
|
||||
}
|
||||
@@ -95,6 +104,124 @@ void __sched mutex_lock(struct mutex *lock)
|
||||
EXPORT_SYMBOL(mutex_lock);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
||||
/*
|
||||
* In order to avoid a stampede of mutex spinners from acquiring the mutex
|
||||
* more or less simultaneously, the spinners need to acquire a MCS lock
|
||||
* first before spinning on the owner field.
|
||||
*
|
||||
* We don't inline mspin_lock() so that perf can correctly account for the
|
||||
* time spent in this lock function.
|
||||
*/
|
||||
struct mspin_node {
|
||||
struct mspin_node *next ;
|
||||
int locked; /* 1 if lock acquired */
|
||||
};
|
||||
#define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock))
|
||||
|
||||
static noinline
|
||||
void mspin_lock(struct mspin_node **lock, struct mspin_node *node)
|
||||
{
|
||||
struct mspin_node *prev;
|
||||
|
||||
/* Init node */
|
||||
node->locked = 0;
|
||||
node->next = NULL;
|
||||
|
||||
prev = xchg(lock, node);
|
||||
if (likely(prev == NULL)) {
|
||||
/* Lock acquired */
|
||||
node->locked = 1;
|
||||
return;
|
||||
}
|
||||
ACCESS_ONCE(prev->next) = node;
|
||||
smp_wmb();
|
||||
/* Wait until the lock holder passes the lock down */
|
||||
while (!ACCESS_ONCE(node->locked))
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
|
||||
static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node)
|
||||
{
|
||||
struct mspin_node *next = ACCESS_ONCE(node->next);
|
||||
|
||||
if (likely(!next)) {
|
||||
/*
|
||||
* Release the lock by setting it to NULL
|
||||
*/
|
||||
if (cmpxchg(lock, node, NULL) == node)
|
||||
return;
|
||||
/* Wait until the next pointer is set */
|
||||
while (!(next = ACCESS_ONCE(node->next)))
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
ACCESS_ONCE(next->locked) = 1;
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Mutex spinning code migrated from kernel/sched/core.c
|
||||
*/
|
||||
|
||||
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
|
||||
{
|
||||
if (lock->owner != owner)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Ensure we emit the owner->on_cpu, dereference _after_ checking
|
||||
* lock->owner still matches owner, if that fails, owner might
|
||||
* point to free()d memory, if it still matches, the rcu_read_lock()
|
||||
* ensures the memory stays valid.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
return owner->on_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look out! "owner" is an entirely speculative pointer
|
||||
* access and not reliable.
|
||||
*/
|
||||
static noinline
|
||||
int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
|
||||
{
|
||||
rcu_read_lock();
|
||||
while (owner_running(lock, owner)) {
|
||||
if (need_resched())
|
||||
break;
|
||||
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* We break out the loop above on need_resched() and when the
|
||||
* owner changed, which is a sign for heavy contention. Return
|
||||
* success only when lock->owner is NULL.
|
||||
*/
|
||||
return lock->owner == NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initial check for entering the mutex spinning loop
|
||||
*/
|
||||
static inline int mutex_can_spin_on_owner(struct mutex *lock)
|
||||
{
|
||||
int retval = 1;
|
||||
|
||||
rcu_read_lock();
|
||||
if (lock->owner)
|
||||
retval = lock->owner->on_cpu;
|
||||
rcu_read_unlock();
|
||||
/*
|
||||
* if lock->owner is not set, the mutex owner may have just acquired
|
||||
* it and not set the owner yet or the mutex has been released.
|
||||
*/
|
||||
return retval;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
|
||||
|
||||
/**
|
||||
@@ -158,25 +285,39 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
*
|
||||
* We can't do this for DEBUG_MUTEXES because that relies on wait_lock
|
||||
* to serialize everything.
|
||||
*
|
||||
* The mutex spinners are queued up using MCS lock so that only one
|
||||
* spinner can compete for the mutex. However, if mutex spinning isn't
|
||||
* going to happen, there is no point in going through the lock/unlock
|
||||
* overhead.
|
||||
*/
|
||||
if (!mutex_can_spin_on_owner(lock))
|
||||
goto slowpath;
|
||||
|
||||
for (;;) {
|
||||
struct task_struct *owner;
|
||||
struct mspin_node node;
|
||||
|
||||
/*
|
||||
* If there's an owner, wait for it to either
|
||||
* release the lock or go to sleep.
|
||||
*/
|
||||
mspin_lock(MLOCK(lock), &node);
|
||||
owner = ACCESS_ONCE(lock->owner);
|
||||
if (owner && !mutex_spin_on_owner(lock, owner))
|
||||
if (owner && !mutex_spin_on_owner(lock, owner)) {
|
||||
mspin_unlock(MLOCK(lock), &node);
|
||||
break;
|
||||
}
|
||||
|
||||
if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
|
||||
if ((atomic_read(&lock->count) == 1) &&
|
||||
(atomic_cmpxchg(&lock->count, 1, 0) == 1)) {
|
||||
lock_acquired(&lock->dep_map, ip);
|
||||
mutex_set_owner(lock);
|
||||
mspin_unlock(MLOCK(lock), &node);
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
mspin_unlock(MLOCK(lock), &node);
|
||||
|
||||
/*
|
||||
* When there's no owner, we might have preempted between the
|
||||
@@ -195,6 +336,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
*/
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
slowpath:
|
||||
#endif
|
||||
spin_lock_mutex(&lock->wait_lock, flags);
|
||||
|
||||
@@ -205,7 +347,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
list_add_tail(&waiter.list, &lock->wait_list);
|
||||
waiter.task = task;
|
||||
|
||||
if (atomic_xchg(&lock->count, -1) == 1)
|
||||
if (MUTEX_SHOW_NO_WAITER(lock) && (atomic_xchg(&lock->count, -1) == 1))
|
||||
goto done;
|
||||
|
||||
lock_contended(&lock->dep_map, ip);
|
||||
@@ -220,7 +362,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
|
||||
* that when we release the lock, we properly wake up the
|
||||
* other waiters:
|
||||
*/
|
||||
if (atomic_xchg(&lock->count, -1) == 1)
|
||||
if (MUTEX_SHOW_NO_WAITER(lock) &&
|
||||
(atomic_xchg(&lock->count, -1) == 1))
|
||||
break;
|
||||
|
||||
/*
|
||||
|
@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
int nr;
|
||||
int rc;
|
||||
struct task_struct *task, *me = current;
|
||||
int init_pids = thread_group_leader(me) ? 1 : 2;
|
||||
|
||||
/* Don't allow any more processes into the pid namespace */
|
||||
disable_pid_allocation(pid_ns);
|
||||
@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
*/
|
||||
for (;;) {
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
if (pid_ns->nr_hashed == 1)
|
||||
if (pid_ns->nr_hashed == init_pids)
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
|
115
kernel/printk.c
115
kernel/printk.c
@@ -49,13 +49,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/printk.h>
|
||||
|
||||
/*
|
||||
* Architectures can override it:
|
||||
*/
|
||||
void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
|
||||
{
|
||||
}
|
||||
|
||||
/* printk's without a loglevel use this.. */
|
||||
#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
|
||||
|
||||
@@ -63,8 +56,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
|
||||
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
|
||||
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
|
||||
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
|
||||
int console_printk[4] = {
|
||||
DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
|
||||
DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
|
||||
@@ -224,6 +215,7 @@ struct log {
|
||||
static DEFINE_RAW_SPINLOCK(logbuf_lock);
|
||||
|
||||
#ifdef CONFIG_PRINTK
|
||||
DECLARE_WAIT_QUEUE_HEAD(log_wait);
|
||||
/* the next printk record to read by syslog(READ) or /proc/kmsg */
|
||||
static u64 syslog_seq;
|
||||
static u32 syslog_idx;
|
||||
@@ -609,7 +601,8 @@ static unsigned int devkmsg_poll(struct file *file, poll_table *wait)
|
||||
/* return error when data has vanished underneath us */
|
||||
if (user->seq < log_first_seq)
|
||||
ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI;
|
||||
ret = POLLIN|POLLRDNORM;
|
||||
else
|
||||
ret = POLLIN|POLLRDNORM;
|
||||
}
|
||||
raw_spin_unlock_irq(&logbuf_lock);
|
||||
|
||||
@@ -1266,7 +1259,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
|
||||
{
|
||||
struct console *con;
|
||||
|
||||
trace_console(text, 0, len, len);
|
||||
trace_console(text, len);
|
||||
|
||||
if (level >= console_loglevel && !ignore_loglevel)
|
||||
return;
|
||||
@@ -1724,6 +1717,29 @@ static size_t cont_print_text(char *text, size_t size) { return 0; }
|
||||
|
||||
#endif /* CONFIG_PRINTK */
|
||||
|
||||
#ifdef CONFIG_EARLY_PRINTK
|
||||
struct console *early_console;
|
||||
|
||||
void early_vprintk(const char *fmt, va_list ap)
|
||||
{
|
||||
if (early_console) {
|
||||
char buf[512];
|
||||
int n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
||||
|
||||
early_console->write(early_console, buf, n);
|
||||
}
|
||||
}
|
||||
|
||||
asmlinkage void early_printk(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
early_vprintk(fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __add_preferred_console(char *name, int idx, char *options,
|
||||
char *brl_options)
|
||||
{
|
||||
@@ -1957,45 +1973,6 @@ int is_console_locked(void)
|
||||
return console_locked;
|
||||
}
|
||||
|
||||
/*
|
||||
* Delayed printk version, for scheduler-internal messages:
|
||||
*/
|
||||
#define PRINTK_BUF_SIZE 512
|
||||
|
||||
#define PRINTK_PENDING_WAKEUP 0x01
|
||||
#define PRINTK_PENDING_SCHED 0x02
|
||||
|
||||
static DEFINE_PER_CPU(int, printk_pending);
|
||||
static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
|
||||
|
||||
static void wake_up_klogd_work_func(struct irq_work *irq_work)
|
||||
{
|
||||
int pending = __this_cpu_xchg(printk_pending, 0);
|
||||
|
||||
if (pending & PRINTK_PENDING_SCHED) {
|
||||
char *buf = __get_cpu_var(printk_sched_buf);
|
||||
printk(KERN_WARNING "[sched_delayed] %s", buf);
|
||||
}
|
||||
|
||||
if (pending & PRINTK_PENDING_WAKEUP)
|
||||
wake_up_interruptible(&log_wait);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
|
||||
.func = wake_up_klogd_work_func,
|
||||
.flags = IRQ_WORK_LAZY,
|
||||
};
|
||||
|
||||
void wake_up_klogd(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (waitqueue_active(&log_wait)) {
|
||||
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
|
||||
irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void console_cont_flush(char *text, size_t size)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -2458,6 +2435,44 @@ static int __init printk_late_init(void)
|
||||
late_initcall(printk_late_init);
|
||||
|
||||
#if defined CONFIG_PRINTK
|
||||
/*
|
||||
* Delayed printk version, for scheduler-internal messages:
|
||||
*/
|
||||
#define PRINTK_BUF_SIZE 512
|
||||
|
||||
#define PRINTK_PENDING_WAKEUP 0x01
|
||||
#define PRINTK_PENDING_SCHED 0x02
|
||||
|
||||
static DEFINE_PER_CPU(int, printk_pending);
|
||||
static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
|
||||
|
||||
static void wake_up_klogd_work_func(struct irq_work *irq_work)
|
||||
{
|
||||
int pending = __this_cpu_xchg(printk_pending, 0);
|
||||
|
||||
if (pending & PRINTK_PENDING_SCHED) {
|
||||
char *buf = __get_cpu_var(printk_sched_buf);
|
||||
printk(KERN_WARNING "[sched_delayed] %s", buf);
|
||||
}
|
||||
|
||||
if (pending & PRINTK_PENDING_WAKEUP)
|
||||
wake_up_interruptible(&log_wait);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
|
||||
.func = wake_up_klogd_work_func,
|
||||
.flags = IRQ_WORK_LAZY,
|
||||
};
|
||||
|
||||
void wake_up_klogd(void)
|
||||
{
|
||||
preempt_disable();
|
||||
if (waitqueue_active(&log_wait)) {
|
||||
this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
|
||||
irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
int printk_sched(const char *fmt, ...)
|
||||
{
|
||||
|
260
kernel/rcutree.c
260
kernel/rcutree.c
@@ -64,7 +64,7 @@
|
||||
static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
|
||||
static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
|
||||
|
||||
#define RCU_STATE_INITIALIZER(sname, cr) { \
|
||||
#define RCU_STATE_INITIALIZER(sname, sabbr, cr) { \
|
||||
.level = { &sname##_state.node[0] }, \
|
||||
.call = cr, \
|
||||
.fqs_state = RCU_GP_IDLE, \
|
||||
@@ -76,13 +76,14 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
|
||||
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
|
||||
.onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
|
||||
.name = #sname, \
|
||||
.abbr = sabbr, \
|
||||
}
|
||||
|
||||
struct rcu_state rcu_sched_state =
|
||||
RCU_STATE_INITIALIZER(rcu_sched, call_rcu_sched);
|
||||
RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
|
||||
|
||||
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, call_rcu_bh);
|
||||
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
|
||||
|
||||
static struct rcu_state *rcu_state;
|
||||
@@ -223,6 +224,8 @@ static ulong jiffies_till_next_fqs = RCU_JIFFIES_TILL_FORCE_QS;
|
||||
module_param(jiffies_till_first_fqs, ulong, 0644);
|
||||
module_param(jiffies_till_next_fqs, ulong, 0644);
|
||||
|
||||
static void rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
struct rcu_data *rdp);
|
||||
static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *));
|
||||
static void force_quiescent_state(struct rcu_state *rsp);
|
||||
static int rcu_pending(int cpu);
|
||||
@@ -310,6 +313,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
|
||||
if (rcu_gp_in_progress(rsp))
|
||||
return 0; /* No, a grace period is already in progress. */
|
||||
if (rcu_nocb_needs_gp(rsp))
|
||||
return 1; /* Yes, a no-CBs CPU needs one. */
|
||||
if (!rdp->nxttail[RCU_NEXT_TAIL])
|
||||
return 0; /* No, this is a no-CBs (or offline) CPU. */
|
||||
if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
|
||||
@@ -1045,10 +1050,11 @@ static void init_callback_list(struct rcu_data *rdp)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (init_nocb_callback_list(rdp))
|
||||
return;
|
||||
rdp->nxtlist = NULL;
|
||||
for (i = 0; i < RCU_NEXT_SIZE; i++)
|
||||
rdp->nxttail[i] = &rdp->nxtlist;
|
||||
init_nocb_callback_list(rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1080,6 +1086,120 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
|
||||
return rnp->completed + 2;
|
||||
}
|
||||
|
||||
/*
|
||||
* Trace-event helper function for rcu_start_future_gp() and
|
||||
* rcu_nocb_wait_gp().
|
||||
*/
|
||||
static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
|
||||
unsigned long c, char *s)
|
||||
{
|
||||
trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
|
||||
rnp->completed, c, rnp->level,
|
||||
rnp->grplo, rnp->grphi, s);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start some future grace period, as needed to handle newly arrived
|
||||
* callbacks. The required future grace periods are recorded in each
|
||||
* rcu_node structure's ->need_future_gp field.
|
||||
*
|
||||
* The caller must hold the specified rcu_node structure's ->lock.
|
||||
*/
|
||||
static unsigned long __maybe_unused
|
||||
rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp)
|
||||
{
|
||||
unsigned long c;
|
||||
int i;
|
||||
struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
|
||||
|
||||
/*
|
||||
* Pick up grace-period number for new callbacks. If this
|
||||
* grace period is already marked as needed, return to the caller.
|
||||
*/
|
||||
c = rcu_cbs_completed(rdp->rsp, rnp);
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Startleaf");
|
||||
if (rnp->need_future_gp[c & 0x1]) {
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Prestartleaf");
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* If either this rcu_node structure or the root rcu_node structure
|
||||
* believe that a grace period is in progress, then we must wait
|
||||
* for the one following, which is in "c". Because our request
|
||||
* will be noticed at the end of the current grace period, we don't
|
||||
* need to explicitly start one.
|
||||
*/
|
||||
if (rnp->gpnum != rnp->completed ||
|
||||
ACCESS_ONCE(rnp->gpnum) != ACCESS_ONCE(rnp->completed)) {
|
||||
rnp->need_future_gp[c & 0x1]++;
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Startedleaf");
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* There might be no grace period in progress. If we don't already
|
||||
* hold it, acquire the root rcu_node structure's lock in order to
|
||||
* start one (if needed).
|
||||
*/
|
||||
if (rnp != rnp_root)
|
||||
raw_spin_lock(&rnp_root->lock);
|
||||
|
||||
/*
|
||||
* Get a new grace-period number. If there really is no grace
|
||||
* period in progress, it will be smaller than the one we obtained
|
||||
* earlier. Adjust callbacks as needed. Note that even no-CBs
|
||||
* CPUs have a ->nxtcompleted[] array, so no no-CBs checks needed.
|
||||
*/
|
||||
c = rcu_cbs_completed(rdp->rsp, rnp_root);
|
||||
for (i = RCU_DONE_TAIL; i < RCU_NEXT_TAIL; i++)
|
||||
if (ULONG_CMP_LT(c, rdp->nxtcompleted[i]))
|
||||
rdp->nxtcompleted[i] = c;
|
||||
|
||||
/*
|
||||
* If the needed for the required grace period is already
|
||||
* recorded, trace and leave.
|
||||
*/
|
||||
if (rnp_root->need_future_gp[c & 0x1]) {
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Prestartedroot");
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
/* Record the need for the future grace period. */
|
||||
rnp_root->need_future_gp[c & 0x1]++;
|
||||
|
||||
/* If a grace period is not already in progress, start one. */
|
||||
if (rnp_root->gpnum != rnp_root->completed) {
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Startedleafroot");
|
||||
} else {
|
||||
trace_rcu_future_gp(rnp, rdp, c, "Startedroot");
|
||||
rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
|
||||
}
|
||||
unlock_out:
|
||||
if (rnp != rnp_root)
|
||||
raw_spin_unlock(&rnp_root->lock);
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up any old requests for the just-ended grace period. Also return
|
||||
* whether any additional grace periods have been requested. Also invoke
|
||||
* rcu_nocb_gp_cleanup() in order to wake up any no-callbacks kthreads
|
||||
* waiting for this grace period to complete.
|
||||
*/
|
||||
static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
{
|
||||
int c = rnp->completed;
|
||||
int needmore;
|
||||
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
|
||||
|
||||
rcu_nocb_gp_cleanup(rsp, rnp);
|
||||
rnp->need_future_gp[c & 0x1] = 0;
|
||||
needmore = rnp->need_future_gp[(c + 1) & 0x1];
|
||||
trace_rcu_future_gp(rnp, rdp, c, needmore ? "CleanupMore" : "Cleanup");
|
||||
return needmore;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is room, assign a ->completed number to any callbacks on
|
||||
* this CPU that have not already been assigned. Also accelerate any
|
||||
@@ -1139,6 +1259,8 @@ static void rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
rdp->nxttail[i] = rdp->nxttail[RCU_NEXT_TAIL];
|
||||
rdp->nxtcompleted[i] = c;
|
||||
}
|
||||
/* Record any needed additional grace periods. */
|
||||
rcu_start_future_gp(rnp, rdp);
|
||||
|
||||
/* Trace depending on how much we were able to accelerate. */
|
||||
if (!*rdp->nxttail[RCU_WAIT_TAIL])
|
||||
@@ -1318,9 +1440,9 @@ static int rcu_gp_init(struct rcu_state *rsp)
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
rcu_preempt_check_blocked_tasks(rnp);
|
||||
rnp->qsmask = rnp->qsmaskinit;
|
||||
rnp->gpnum = rsp->gpnum;
|
||||
ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
|
||||
WARN_ON_ONCE(rnp->completed != rsp->completed);
|
||||
rnp->completed = rsp->completed;
|
||||
ACCESS_ONCE(rnp->completed) = rsp->completed;
|
||||
if (rnp == rdp->mynode)
|
||||
rcu_start_gp_per_cpu(rsp, rnp, rdp);
|
||||
rcu_preempt_boost_start_gp(rnp);
|
||||
@@ -1329,7 +1451,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
|
||||
rnp->grphi, rnp->qsmask);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
#ifdef CONFIG_PROVE_RCU_DELAY
|
||||
if ((random32() % (rcu_num_nodes * 8)) == 0)
|
||||
if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
|
||||
system_state == SYSTEM_RUNNING)
|
||||
schedule_timeout_uninterruptible(2);
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
|
||||
cond_resched();
|
||||
@@ -1371,6 +1494,7 @@ int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
|
||||
static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
{
|
||||
unsigned long gp_duration;
|
||||
int nocb = 0;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
@@ -1400,17 +1524,23 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
|
||||
*/
|
||||
rcu_for_each_node_breadth_first(rsp, rnp) {
|
||||
raw_spin_lock_irq(&rnp->lock);
|
||||
rnp->completed = rsp->gpnum;
|
||||
ACCESS_ONCE(rnp->completed) = rsp->gpnum;
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
if (rnp == rdp->mynode)
|
||||
__rcu_process_gp_end(rsp, rnp, rdp);
|
||||
nocb += rcu_future_gp_cleanup(rsp, rnp);
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
cond_resched();
|
||||
}
|
||||
rnp = rcu_get_root(rsp);
|
||||
raw_spin_lock_irq(&rnp->lock);
|
||||
rcu_nocb_gp_set(rnp, nocb);
|
||||
|
||||
rsp->completed = rsp->gpnum; /* Declare grace period done. */
|
||||
trace_rcu_grace_period(rsp->name, rsp->completed, "end");
|
||||
rsp->fqs_state = RCU_GP_IDLE;
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
rcu_advance_cbs(rsp, rnp, rdp); /* Reduce false positives below. */
|
||||
if (cpu_needs_another_gp(rsp, rdp))
|
||||
rsp->gp_flags = 1;
|
||||
raw_spin_unlock_irq(&rnp->lock);
|
||||
@@ -1486,57 +1616,62 @@ static int __noreturn rcu_gp_kthread(void *arg)
|
||||
/*
|
||||
* Start a new RCU grace period if warranted, re-initializing the hierarchy
|
||||
* in preparation for detecting the next grace period. The caller must hold
|
||||
* the root node's ->lock, which is released before return. Hard irqs must
|
||||
* be disabled.
|
||||
* the root node's ->lock and hard irqs must be disabled.
|
||||
*
|
||||
* Note that it is legal for a dying CPU (which is marked as offline) to
|
||||
* invoke this function. This can happen when the dying CPU reports its
|
||||
* quiescent state.
|
||||
*/
|
||||
static void
|
||||
rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
|
||||
__releases(rcu_get_root(rsp)->lock)
|
||||
rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||
struct rcu_data *rdp)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
if (!rsp->gp_kthread ||
|
||||
!cpu_needs_another_gp(rsp, rdp)) {
|
||||
if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
|
||||
/*
|
||||
* Either we have not yet spawned the grace-period
|
||||
* task, this CPU does not need another grace period,
|
||||
* or a grace period is already in progress.
|
||||
* Either way, don't start a new grace period.
|
||||
*/
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because there is no grace period in progress right now,
|
||||
* any callbacks we have up to this point will be satisfied
|
||||
* by the next grace period. So this is a good place to
|
||||
* assign a grace period number to recently posted callbacks.
|
||||
*/
|
||||
rcu_accelerate_cbs(rsp, rnp, rdp);
|
||||
|
||||
rsp->gp_flags = RCU_GP_FLAG_INIT;
|
||||
raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
|
||||
|
||||
/* Ensure that CPU is aware of completion of last grace period. */
|
||||
rcu_process_gp_end(rsp, rdp);
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* Wake up rcu_gp_kthread() to start the grace period. */
|
||||
wake_up(&rsp->gp_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
|
||||
* callbacks. Note that rcu_start_gp_advanced() cannot do this because it
|
||||
* is invoked indirectly from rcu_advance_cbs(), which would result in
|
||||
* endless recursion -- or would do so if it wasn't for the self-deadlock
|
||||
* that is encountered beforehand.
|
||||
*/
|
||||
static void
|
||||
rcu_start_gp(struct rcu_state *rsp)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
/*
|
||||
* If there is no grace period in progress right now, any
|
||||
* callbacks we have up to this point will be satisfied by the
|
||||
* next grace period. Also, advancing the callbacks reduces the
|
||||
* probability of false positives from cpu_needs_another_gp()
|
||||
* resulting in pointless grace periods. So, advance callbacks
|
||||
* then start the grace period!
|
||||
*/
|
||||
rcu_advance_cbs(rsp, rnp, rdp);
|
||||
rcu_start_gp_advanced(rsp, rnp, rdp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Report a full set of quiescent states to the specified rcu_state
|
||||
* data structure. This involves cleaning up after the prior grace
|
||||
* period and letting rcu_start_gp() start up the next grace period
|
||||
* if one is needed. Note that the caller must hold rnp->lock, as
|
||||
* required by rcu_start_gp(), which will release it.
|
||||
* if one is needed. Note that the caller must hold rnp->lock, which
|
||||
* is released before return.
|
||||
*/
|
||||
static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
|
||||
__releases(rcu_get_root(rsp)->lock)
|
||||
@@ -2134,7 +2269,8 @@ __rcu_process_callbacks(struct rcu_state *rsp)
|
||||
local_irq_save(flags);
|
||||
if (cpu_needs_another_gp(rsp, rdp)) {
|
||||
raw_spin_lock(&rcu_get_root(rsp)->lock); /* irqs disabled. */
|
||||
rcu_start_gp(rsp, flags); /* releases above lock */
|
||||
rcu_start_gp(rsp);
|
||||
raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
|
||||
} else {
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
@@ -2179,7 +2315,8 @@ static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
|
||||
static void invoke_rcu_core(void)
|
||||
{
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
if (cpu_online(smp_processor_id()))
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2214,11 +2351,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
|
||||
|
||||
/* Start a new grace period if one not already started. */
|
||||
if (!rcu_gp_in_progress(rsp)) {
|
||||
unsigned long nestflag;
|
||||
struct rcu_node *rnp_root = rcu_get_root(rsp);
|
||||
|
||||
raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
|
||||
rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */
|
||||
raw_spin_lock(&rnp_root->lock);
|
||||
rcu_start_gp(rsp);
|
||||
raw_spin_unlock(&rnp_root->lock);
|
||||
} else {
|
||||
/* Give the grace period a kick. */
|
||||
rdp->blimit = LONG_MAX;
|
||||
@@ -2638,19 +2775,27 @@ static int rcu_pending(int cpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any future RCU-related work will need to be done
|
||||
* by the current CPU, even if none need be done immediately, returning
|
||||
* 1 if so.
|
||||
* Return true if the specified CPU has any callback. If all_lazy is
|
||||
* non-NULL, store an indication of whether all callbacks are lazy.
|
||||
* (If there are no callbacks, all of them are deemed to be lazy.)
|
||||
*/
|
||||
static int rcu_cpu_has_callbacks(int cpu)
|
||||
static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
|
||||
{
|
||||
bool al = true;
|
||||
bool hc = false;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
/* RCU callbacks either ready or pending? */
|
||||
for_each_rcu_flavor(rsp)
|
||||
if (per_cpu_ptr(rsp->rda, cpu)->nxtlist)
|
||||
return 1;
|
||||
return 0;
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (rdp->qlen != rdp->qlen_lazy)
|
||||
al = false;
|
||||
if (rdp->nxtlist)
|
||||
hc = true;
|
||||
}
|
||||
if (all_lazy)
|
||||
*all_lazy = al;
|
||||
return hc;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2869,7 +3014,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
|
||||
rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
|
||||
atomic_set(&rdp->dynticks->dynticks,
|
||||
(atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
|
||||
rcu_prepare_for_idle_init(cpu);
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
|
||||
/* Add CPU to rcu_node bitmasks. */
|
||||
@@ -2919,7 +3063,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
struct rcu_state *rsp;
|
||||
int ret = NOTIFY_OK;
|
||||
|
||||
trace_rcu_utilization("Start CPU hotplug");
|
||||
switch (action) {
|
||||
@@ -2933,21 +3076,12 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
rcu_boost_kthread_setaffinity(rnp, -1);
|
||||
break;
|
||||
case CPU_DOWN_PREPARE:
|
||||
if (nocb_cpu_expendable(cpu))
|
||||
rcu_boost_kthread_setaffinity(rnp, cpu);
|
||||
else
|
||||
ret = NOTIFY_BAD;
|
||||
rcu_boost_kthread_setaffinity(rnp, cpu);
|
||||
break;
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
/*
|
||||
* The whole machine is "stopped" except this CPU, so we can
|
||||
* touch any data without introducing corruption. We send the
|
||||
* dying CPU's callbacks to an arbitrarily chosen online CPU.
|
||||
*/
|
||||
for_each_rcu_flavor(rsp)
|
||||
rcu_cleanup_dying_cpu(rsp);
|
||||
rcu_cleanup_after_idle(cpu);
|
||||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
@@ -2960,7 +3094,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
break;
|
||||
}
|
||||
trace_rcu_utilization("End CPU hotplug");
|
||||
return ret;
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3095,6 +3229,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
|
||||
}
|
||||
rnp->level = i;
|
||||
INIT_LIST_HEAD(&rnp->blkd_tasks);
|
||||
rcu_init_one_nocb(rnp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3180,8 +3315,7 @@ void __init rcu_init(void)
|
||||
rcu_init_one(&rcu_sched_state, &rcu_sched_data);
|
||||
rcu_init_one(&rcu_bh_state, &rcu_bh_data);
|
||||
__rcu_init_preempt();
|
||||
rcu_init_nocb();
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
|
||||
|
||||
/*
|
||||
* We don't need protection against CPU-hotplug here because
|
||||
|
@@ -88,18 +88,13 @@ struct rcu_dynticks {
|
||||
int dynticks_nmi_nesting; /* Track NMI nesting level. */
|
||||
atomic_t dynticks; /* Even value for idle, else odd. */
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
int dyntick_drain; /* Prepare-for-idle state variable. */
|
||||
unsigned long dyntick_holdoff;
|
||||
/* No retries for the jiffy of failure. */
|
||||
struct timer_list idle_gp_timer;
|
||||
/* Wake up CPU sleeping with callbacks. */
|
||||
unsigned long idle_gp_timer_expires;
|
||||
/* When to wake up CPU (for repost). */
|
||||
bool idle_first_pass; /* First pass of attempt to go idle? */
|
||||
bool all_lazy; /* Are all CPU's CBs lazy? */
|
||||
unsigned long nonlazy_posted;
|
||||
/* # times non-lazy CBs posted to CPU. */
|
||||
unsigned long nonlazy_posted_snap;
|
||||
/* idle-period nonlazy_posted snapshot. */
|
||||
unsigned long last_accelerate;
|
||||
/* Last jiffy CBs were accelerated. */
|
||||
int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
|
||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
};
|
||||
@@ -134,9 +129,6 @@ struct rcu_node {
|
||||
/* elements that need to drain to allow the */
|
||||
/* current expedited grace period to */
|
||||
/* complete (only for TREE_PREEMPT_RCU). */
|
||||
atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */
|
||||
/* Since this has meaning only for leaf */
|
||||
/* rcu_node structures, 32 bits suffices. */
|
||||
unsigned long qsmaskinit;
|
||||
/* Per-GP initial value for qsmask & expmask. */
|
||||
unsigned long grpmask; /* Mask to apply to parent qsmask. */
|
||||
@@ -196,6 +188,12 @@ struct rcu_node {
|
||||
/* Refused to boost: not sure why, though. */
|
||||
/* This can happen due to race conditions. */
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
wait_queue_head_t nocb_gp_wq[2];
|
||||
/* Place for rcu_nocb_kthread() to wait GP. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
int need_future_gp[2];
|
||||
/* Counts of upcoming no-CB GP requests. */
|
||||
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
|
||||
} ____cacheline_internodealigned_in_smp;
|
||||
|
||||
@@ -328,6 +326,11 @@ struct rcu_data {
|
||||
struct task_struct *nocb_kthread;
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
/* 8) RCU CPU stall data. */
|
||||
#ifdef CONFIG_RCU_CPU_STALL_INFO
|
||||
unsigned int softirq_snap; /* Snapshot of softirq activity. */
|
||||
#endif /* #ifdef CONFIG_RCU_CPU_STALL_INFO */
|
||||
|
||||
int cpu;
|
||||
struct rcu_state *rsp;
|
||||
};
|
||||
@@ -375,12 +378,6 @@ struct rcu_state {
|
||||
struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
|
||||
void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
|
||||
void (*func)(struct rcu_head *head));
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
void (*call_remote)(struct rcu_head *head,
|
||||
void (*func)(struct rcu_head *head));
|
||||
/* call_rcu() flavor, but for */
|
||||
/* placing on remote CPU. */
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
/* The following fields are guarded by the root rcu_node's lock. */
|
||||
|
||||
@@ -443,6 +440,7 @@ struct rcu_state {
|
||||
unsigned long gp_max; /* Maximum GP duration in */
|
||||
/* jiffies. */
|
||||
char *name; /* Name of structure. */
|
||||
char abbr; /* Abbreviated name. */
|
||||
struct list_head flavors; /* List of RCU flavors. */
|
||||
};
|
||||
|
||||
@@ -520,7 +518,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp);
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
static void __cpuinit rcu_prepare_kthreads(int cpu);
|
||||
static void rcu_prepare_for_idle_init(int cpu);
|
||||
static void rcu_cleanup_after_idle(int cpu);
|
||||
static void rcu_prepare_for_idle(int cpu);
|
||||
static void rcu_idle_count_callbacks_posted(void);
|
||||
@@ -529,16 +526,18 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
|
||||
static void print_cpu_stall_info_end(void);
|
||||
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
|
||||
static void increment_cpu_stall_ticks(void);
|
||||
static int rcu_nocb_needs_gp(struct rcu_state *rsp);
|
||||
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp);
|
||||
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
bool lazy);
|
||||
static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
struct rcu_data *rdp);
|
||||
static bool nocb_cpu_expendable(int cpu);
|
||||
static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
|
||||
static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
|
||||
static void init_nocb_callback_list(struct rcu_data *rdp);
|
||||
static void __init rcu_init_nocb(void);
|
||||
static void rcu_kick_nohz_cpu(int cpu);
|
||||
static bool init_nocb_callback_list(struct rcu_data *rdp);
|
||||
|
||||
#endif /* #ifndef RCU_TREE_NONCORE */
|
||||
|
||||
|
@@ -86,11 +86,21 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
if (nr_cpu_ids != NR_CPUS)
|
||||
printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
|
||||
#ifdef CONFIG_RCU_NOCB_CPU
|
||||
#ifndef CONFIG_RCU_NOCB_CPU_NONE
|
||||
if (!have_rcu_nocb_mask) {
|
||||
alloc_bootmem_cpumask_var(&rcu_nocb_mask);
|
||||
have_rcu_nocb_mask = true;
|
||||
}
|
||||
#ifdef CONFIG_RCU_NOCB_CPU_ZERO
|
||||
pr_info("\tExperimental no-CBs CPU 0\n");
|
||||
cpumask_set_cpu(0, rcu_nocb_mask);
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
|
||||
#ifdef CONFIG_RCU_NOCB_CPU_ALL
|
||||
pr_info("\tExperimental no-CBs for all CPUs\n");
|
||||
cpumask_setall(rcu_nocb_mask);
|
||||
#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
|
||||
#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
|
||||
if (have_rcu_nocb_mask) {
|
||||
if (cpumask_test_cpu(0, rcu_nocb_mask)) {
|
||||
cpumask_clear_cpu(0, rcu_nocb_mask);
|
||||
pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
|
||||
}
|
||||
cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
|
||||
pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
|
||||
if (rcu_nocb_poll)
|
||||
@@ -102,7 +112,7 @@ static void __init rcu_bootup_announce_oddness(void)
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
|
||||
struct rcu_state rcu_preempt_state =
|
||||
RCU_STATE_INITIALIZER(rcu_preempt, call_rcu);
|
||||
RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
|
||||
static struct rcu_state *rcu_state = &rcu_preempt_state;
|
||||
|
||||
@@ -1534,14 +1544,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
|
||||
int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
|
||||
{
|
||||
*delta_jiffies = ULONG_MAX;
|
||||
return rcu_cpu_has_callbacks(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
|
||||
*/
|
||||
static void rcu_prepare_for_idle_init(int cpu)
|
||||
{
|
||||
return rcu_cpu_has_callbacks(cpu, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1578,16 +1581,6 @@ static void rcu_idle_count_callbacks_posted(void)
|
||||
*
|
||||
* The following three proprocessor symbols control this state machine:
|
||||
*
|
||||
* RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
|
||||
* to satisfy RCU. Beyond this point, it is better to incur a periodic
|
||||
* scheduling-clock interrupt than to loop through the state machine
|
||||
* at full power.
|
||||
* RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
|
||||
* optional if RCU does not need anything immediately from this
|
||||
* CPU, even if this CPU still has RCU callbacks queued. The first
|
||||
* times through the state machine are mandatory: we need to give
|
||||
* the state machine a chance to communicate a quiescent state
|
||||
* to the RCU core.
|
||||
* RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
|
||||
* to sleep in dyntick-idle mode with RCU callbacks pending. This
|
||||
* is sized to be roughly one RCU grace period. Those energy-efficiency
|
||||
@@ -1603,186 +1596,108 @@ static void rcu_idle_count_callbacks_posted(void)
|
||||
* adjustment, they can be converted into kernel config parameters, though
|
||||
* making the state machine smarter might be a better option.
|
||||
*/
|
||||
#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
|
||||
#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
|
||||
#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
|
||||
#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
|
||||
|
||||
static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
|
||||
module_param(rcu_idle_gp_delay, int, 0644);
|
||||
static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
|
||||
module_param(rcu_idle_lazy_gp_delay, int, 0644);
|
||||
|
||||
extern int tick_nohz_enabled;
|
||||
|
||||
/*
|
||||
* Does the specified flavor of RCU have non-lazy callbacks pending on
|
||||
* the specified CPU? Both RCU flavor and CPU are specified by the
|
||||
* rcu_data structure.
|
||||
* Try to advance callbacks for all flavors of RCU on the current CPU.
|
||||
* Afterwards, if there are any callbacks ready for immediate invocation,
|
||||
* return true.
|
||||
*/
|
||||
static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
|
||||
static bool rcu_try_advance_all_cbs(void)
|
||||
{
|
||||
return rdp->qlen != rdp->qlen_lazy;
|
||||
}
|
||||
bool cbs_ready = false;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_node *rnp;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = this_cpu_ptr(rsp->rda);
|
||||
rnp = rdp->mynode;
|
||||
|
||||
/*
|
||||
* Are there non-lazy RCU-preempt callbacks? (There cannot be if there
|
||||
* is no RCU-preempt in the kernel.)
|
||||
*/
|
||||
static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
|
||||
/*
|
||||
* Don't bother checking unless a grace period has
|
||||
* completed since we last checked and there are
|
||||
* callbacks not yet ready to invoke.
|
||||
*/
|
||||
if (rdp->completed != rnp->completed &&
|
||||
rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
|
||||
rcu_process_gp_end(rsp, rdp);
|
||||
|
||||
return __rcu_cpu_has_nonlazy_callbacks(rdp);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
|
||||
static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
|
||||
/*
|
||||
* Does any flavor of RCU have non-lazy callbacks on the specified CPU?
|
||||
*/
|
||||
static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
|
||||
{
|
||||
return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
|
||||
__rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
|
||||
rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
|
||||
if (cpu_has_callbacks_ready_to_invoke(rdp))
|
||||
cbs_ready = true;
|
||||
}
|
||||
return cbs_ready;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow the CPU to enter dyntick-idle mode if either: (1) There are no
|
||||
* callbacks on this CPU, (2) this CPU has not yet attempted to enter
|
||||
* dyntick-idle mode, or (3) this CPU is in the process of attempting to
|
||||
* enter dyntick-idle mode. Otherwise, if we have recently tried and failed
|
||||
* to enter dyntick-idle mode, we refuse to try to enter it. After all,
|
||||
* it is better to incur scheduling-clock interrupts than to spin
|
||||
* continuously for the same time duration!
|
||||
* Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
|
||||
* to invoke. If the CPU has callbacks, try to advance them. Tell the
|
||||
* caller to set the timeout based on whether or not there are non-lazy
|
||||
* callbacks.
|
||||
*
|
||||
* The delta_jiffies argument is used to store the time when RCU is
|
||||
* going to need the CPU again if it still has callbacks. The reason
|
||||
* for this is that rcu_prepare_for_idle() might need to post a timer,
|
||||
* but if so, it will do so after tick_nohz_stop_sched_tick() has set
|
||||
* the wakeup time for this CPU. This means that RCU's timer can be
|
||||
* delayed until the wakeup time, which defeats the purpose of posting
|
||||
* a timer.
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
|
||||
int rcu_needs_cpu(int cpu, unsigned long *dj)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
|
||||
/* Flag a new idle sojourn to the idle-entry state machine. */
|
||||
rdtp->idle_first_pass = 1;
|
||||
/* Snapshot to detect later posting of non-lazy callback. */
|
||||
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
|
||||
|
||||
/* If no callbacks, RCU doesn't need the CPU. */
|
||||
if (!rcu_cpu_has_callbacks(cpu)) {
|
||||
*delta_jiffies = ULONG_MAX;
|
||||
if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
|
||||
*dj = ULONG_MAX;
|
||||
return 0;
|
||||
}
|
||||
if (rdtp->dyntick_holdoff == jiffies) {
|
||||
/* RCU recently tried and failed, so don't try again. */
|
||||
*delta_jiffies = 1;
|
||||
|
||||
/* Attempt to advance callbacks. */
|
||||
if (rcu_try_advance_all_cbs()) {
|
||||
/* Some ready to invoke, so initiate later invocation. */
|
||||
invoke_rcu_core();
|
||||
return 1;
|
||||
}
|
||||
/* Set up for the possibility that RCU will post a timer. */
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
*delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies,
|
||||
RCU_IDLE_GP_DELAY) - jiffies;
|
||||
rdtp->last_accelerate = jiffies;
|
||||
|
||||
/* Request timer delay depending on laziness, and round. */
|
||||
if (rdtp->all_lazy) {
|
||||
*dj = round_up(rcu_idle_gp_delay + jiffies,
|
||||
rcu_idle_gp_delay) - jiffies;
|
||||
} else {
|
||||
*delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY;
|
||||
*delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
|
||||
*dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for smp_call_function_single(). The only point of this
|
||||
* handler is to wake the CPU up, so the handler does only tracing.
|
||||
*/
|
||||
void rcu_idle_demigrate(void *unused)
|
||||
{
|
||||
trace_rcu_prep_idle("Demigrate");
|
||||
}
|
||||
|
||||
/*
|
||||
* Timer handler used to force CPU to start pushing its remaining RCU
|
||||
* callbacks in the case where it entered dyntick-idle mode with callbacks
|
||||
* pending. The hander doesn't really need to do anything because the
|
||||
* real work is done upon re-entry to idle, or by the next scheduling-clock
|
||||
* interrupt should idle not be re-entered.
|
||||
*
|
||||
* One special case: the timer gets migrated without awakening the CPU
|
||||
* on which the timer was scheduled on. In this case, we must wake up
|
||||
* that CPU. We do so with smp_call_function_single().
|
||||
*/
|
||||
static void rcu_idle_gp_timer_func(unsigned long cpu_in)
|
||||
{
|
||||
int cpu = (int)cpu_in;
|
||||
|
||||
trace_rcu_prep_idle("Timer");
|
||||
if (cpu != smp_processor_id())
|
||||
smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
|
||||
else
|
||||
WARN_ON_ONCE(1); /* Getting here can hang the system... */
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the timer used to pull CPUs out of dyntick-idle mode.
|
||||
*/
|
||||
static void rcu_prepare_for_idle_init(int cpu)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
|
||||
rdtp->dyntick_holdoff = jiffies - 1;
|
||||
setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
|
||||
rdtp->idle_gp_timer_expires = jiffies - 1;
|
||||
rdtp->idle_first_pass = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up for exit from idle. Because we are exiting from idle, there
|
||||
* is no longer any point to ->idle_gp_timer, so cancel it. This will
|
||||
* do nothing if this timer is not active, so just cancel it unconditionally.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(int cpu)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
|
||||
del_timer(&rdtp->idle_gp_timer);
|
||||
trace_rcu_prep_idle("Cleanup after idle");
|
||||
rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check to see if any RCU-related work can be done by the current CPU,
|
||||
* and if so, schedule a softirq to get it done. This function is part
|
||||
* of the RCU implementation; it is -not- an exported member of the RCU API.
|
||||
*
|
||||
* The idea is for the current CPU to clear out all work required by the
|
||||
* RCU core for the current grace period, so that this CPU can be permitted
|
||||
* to enter dyntick-idle mode. In some cases, it will need to be awakened
|
||||
* at the end of the grace period by whatever CPU ends the grace period.
|
||||
* This allows CPUs to go dyntick-idle more quickly, and to reduce the
|
||||
* number of wakeups by a modest integer factor.
|
||||
*
|
||||
* Because it is not legal to invoke rcu_process_callbacks() with irqs
|
||||
* disabled, we do one pass of force_quiescent_state(), then do a
|
||||
* invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
|
||||
* later. The ->dyntick_drain field controls the sequencing.
|
||||
* Prepare a CPU for idle from an RCU perspective. The first major task
|
||||
* is to sense whether nohz mode has been enabled or disabled via sysfs.
|
||||
* The second major task is to check to see if a non-lazy callback has
|
||||
* arrived at a CPU that previously had only lazy callbacks. The third
|
||||
* major task is to accelerate (that is, assign grace-period numbers to)
|
||||
* any recently arrived callbacks.
|
||||
*
|
||||
* The caller must have disabled interrupts.
|
||||
*/
|
||||
static void rcu_prepare_for_idle(int cpu)
|
||||
{
|
||||
struct timer_list *tp;
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
struct rcu_node *rnp;
|
||||
struct rcu_state *rsp;
|
||||
int tne;
|
||||
|
||||
/* Handle nohz enablement switches conservatively. */
|
||||
tne = ACCESS_ONCE(tick_nohz_enabled);
|
||||
if (tne != rdtp->tick_nohz_enabled_snap) {
|
||||
if (rcu_cpu_has_callbacks(cpu))
|
||||
if (rcu_cpu_has_callbacks(cpu, NULL))
|
||||
invoke_rcu_core(); /* force nohz to see update. */
|
||||
rdtp->tick_nohz_enabled_snap = tne;
|
||||
return;
|
||||
@@ -1790,125 +1705,56 @@ static void rcu_prepare_for_idle(int cpu)
|
||||
if (!tne)
|
||||
return;
|
||||
|
||||
/* Adaptive-tick mode, where usermode execution is idle to RCU. */
|
||||
if (!is_idle_task(current)) {
|
||||
rdtp->dyntick_holdoff = jiffies - 1;
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("User dyntick with callbacks");
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_up(jiffies + RCU_IDLE_GP_DELAY,
|
||||
RCU_IDLE_GP_DELAY);
|
||||
} else if (rcu_cpu_has_callbacks(cpu)) {
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
|
||||
trace_rcu_prep_idle("User dyntick with lazy callbacks");
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
tp = &rdtp->idle_gp_timer;
|
||||
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
|
||||
/* If this is a no-CBs CPU, no callbacks, just return. */
|
||||
if (is_nocb_cpu(cpu))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is an idle re-entry, for example, due to use of
|
||||
* RCU_NONIDLE() or the new idle-loop tracing API within the idle
|
||||
* loop, then don't take any state-machine actions, unless the
|
||||
* momentary exit from idle queued additional non-lazy callbacks.
|
||||
* Instead, repost the ->idle_gp_timer if this CPU has callbacks
|
||||
* pending.
|
||||
* If a non-lazy callback arrived at a CPU having only lazy
|
||||
* callbacks, invoke RCU core for the side-effect of recalculating
|
||||
* idle duration on re-entry to idle.
|
||||
*/
|
||||
if (!rdtp->idle_first_pass &&
|
||||
(rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) {
|
||||
if (rcu_cpu_has_callbacks(cpu)) {
|
||||
tp = &rdtp->idle_gp_timer;
|
||||
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
|
||||
}
|
||||
return;
|
||||
}
|
||||
rdtp->idle_first_pass = 0;
|
||||
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
|
||||
|
||||
/*
|
||||
* If there are no callbacks on this CPU, enter dyntick-idle mode.
|
||||
* Also reset state to avoid prejudicing later attempts.
|
||||
*/
|
||||
if (!rcu_cpu_has_callbacks(cpu)) {
|
||||
rdtp->dyntick_holdoff = jiffies - 1;
|
||||
rdtp->dyntick_drain = 0;
|
||||
trace_rcu_prep_idle("No callbacks");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If in holdoff mode, just return. We will presumably have
|
||||
* refrained from disabling the scheduling-clock tick.
|
||||
*/
|
||||
if (rdtp->dyntick_holdoff == jiffies) {
|
||||
trace_rcu_prep_idle("In holdoff");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check and update the ->dyntick_drain sequencing. */
|
||||
if (rdtp->dyntick_drain <= 0) {
|
||||
/* First time through, initialize the counter. */
|
||||
rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
|
||||
} else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
|
||||
!rcu_pending(cpu) &&
|
||||
!local_softirq_pending()) {
|
||||
/* Can we go dyntick-idle despite still having callbacks? */
|
||||
rdtp->dyntick_drain = 0;
|
||||
rdtp->dyntick_holdoff = jiffies;
|
||||
if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("Dyntick with callbacks");
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_up(jiffies + RCU_IDLE_GP_DELAY,
|
||||
RCU_IDLE_GP_DELAY);
|
||||
} else {
|
||||
rdtp->idle_gp_timer_expires =
|
||||
round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
|
||||
trace_rcu_prep_idle("Dyntick with lazy callbacks");
|
||||
}
|
||||
tp = &rdtp->idle_gp_timer;
|
||||
mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
|
||||
rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
|
||||
return; /* Nothing more to do immediately. */
|
||||
} else if (--(rdtp->dyntick_drain) <= 0) {
|
||||
/* We have hit the limit, so time to give up. */
|
||||
rdtp->dyntick_holdoff = jiffies;
|
||||
trace_rcu_prep_idle("Begin holdoff");
|
||||
invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do one step of pushing the remaining RCU callbacks through
|
||||
* the RCU core state machine.
|
||||
*/
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
|
||||
rcu_preempt_qs(cpu);
|
||||
force_quiescent_state(&rcu_preempt_state);
|
||||
}
|
||||
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
|
||||
if (per_cpu(rcu_sched_data, cpu).nxtlist) {
|
||||
rcu_sched_qs(cpu);
|
||||
force_quiescent_state(&rcu_sched_state);
|
||||
}
|
||||
if (per_cpu(rcu_bh_data, cpu).nxtlist) {
|
||||
rcu_bh_qs(cpu);
|
||||
force_quiescent_state(&rcu_bh_state);
|
||||
}
|
||||
|
||||
/*
|
||||
* If RCU callbacks are still pending, RCU still needs this CPU.
|
||||
* So try forcing the callbacks through the grace period.
|
||||
*/
|
||||
if (rcu_cpu_has_callbacks(cpu)) {
|
||||
trace_rcu_prep_idle("More callbacks");
|
||||
if (rdtp->all_lazy &&
|
||||
rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
|
||||
invoke_rcu_core();
|
||||
} else {
|
||||
trace_rcu_prep_idle("Callbacks drained");
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have not yet accelerated this jiffy, accelerate all
|
||||
* callbacks on this CPU.
|
||||
*/
|
||||
if (rdtp->last_accelerate == jiffies)
|
||||
return;
|
||||
rdtp->last_accelerate = jiffies;
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (!*rdp->nxttail[RCU_DONE_TAIL])
|
||||
continue;
|
||||
rnp = rdp->mynode;
|
||||
raw_spin_lock(&rnp->lock); /* irqs already disabled. */
|
||||
rcu_accelerate_cbs(rsp, rnp, rdp);
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean up for exit from idle. Attempt to advance callbacks based on
|
||||
* any grace periods that elapsed while the CPU was idle, and if any
|
||||
* callbacks are now ready to invoke, initiate invocation.
|
||||
*/
|
||||
static void rcu_cleanup_after_idle(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_state *rsp;
|
||||
|
||||
if (is_nocb_cpu(cpu))
|
||||
return;
|
||||
rcu_try_advance_all_cbs();
|
||||
for_each_rcu_flavor(rsp) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
if (cpu_has_callbacks_ready_to_invoke(rdp))
|
||||
invoke_rcu_core();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2016,16 +1862,13 @@ early_initcall(rcu_register_oom_notifier);
|
||||
static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
|
||||
{
|
||||
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
|
||||
struct timer_list *tltp = &rdtp->idle_gp_timer;
|
||||
char c;
|
||||
unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
|
||||
|
||||
c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.';
|
||||
if (timer_pending(tltp))
|
||||
sprintf(cp, "drain=%d %c timer=%lu",
|
||||
rdtp->dyntick_drain, c, tltp->expires - jiffies);
|
||||
else
|
||||
sprintf(cp, "drain=%d %c timer not pending",
|
||||
rdtp->dyntick_drain, c);
|
||||
sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
|
||||
rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
|
||||
ulong2long(nlpd),
|
||||
rdtp->all_lazy ? 'L' : '.',
|
||||
rdtp->tick_nohz_enabled_snap ? '.' : 'D');
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||
@@ -2071,10 +1914,11 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
|
||||
ticks_value = rsp->gpnum - rdp->gpnum;
|
||||
}
|
||||
print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
|
||||
printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n",
|
||||
printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
|
||||
cpu, ticks_value, ticks_title,
|
||||
atomic_read(&rdtp->dynticks) & 0xfff,
|
||||
rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
|
||||
rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
|
||||
fast_no_hz);
|
||||
}
|
||||
|
||||
@@ -2088,6 +1932,7 @@ static void print_cpu_stall_info_end(void)
|
||||
static void zero_cpu_stall_ticks(struct rcu_data *rdp)
|
||||
{
|
||||
rdp->ticks_this_gp = 0;
|
||||
rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
|
||||
}
|
||||
|
||||
/* Increment ->ticks_this_gp for all flavors of RCU. */
|
||||
@@ -2166,6 +2011,47 @@ static int __init parse_rcu_nocb_poll(char *arg)
|
||||
}
|
||||
early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
|
||||
|
||||
/*
|
||||
* Do any no-CBs CPUs need another grace period?
|
||||
*
|
||||
* Interrupts must be disabled. If the caller does not hold the root
|
||||
* rnp_node structure's ->lock, the results are advisory only.
|
||||
*/
|
||||
static int rcu_nocb_needs_gp(struct rcu_state *rsp)
|
||||
{
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
|
||||
}
|
||||
|
||||
/*
|
||||
* Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
|
||||
* grace period.
|
||||
*/
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
{
|
||||
wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the root rcu_node structure's ->need_future_gp field
|
||||
* based on the sum of those of all rcu_node structures. This does
|
||||
* double-count the root rcu_node structure's requests, but this
|
||||
* is necessary to handle the possibility of a rcu_nocb_kthread()
|
||||
* having awakened during the time that the rcu_node structures
|
||||
* were being updated for the end of the previous grace period.
|
||||
*/
|
||||
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
||||
{
|
||||
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[0]);
|
||||
init_waitqueue_head(&rnp->nocb_gp_wq[1]);
|
||||
}
|
||||
|
||||
/* Is the specified CPU a no-CPUs CPU? */
|
||||
bool rcu_is_nocb_cpu(int cpu)
|
||||
{
|
||||
@@ -2228,6 +2114,13 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
if (!rcu_is_nocb_cpu(rdp->cpu))
|
||||
return 0;
|
||||
__call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
|
||||
if (__is_kfree_rcu_offset((unsigned long)rhp->func))
|
||||
trace_rcu_kfree_callback(rdp->rsp->name, rhp,
|
||||
(unsigned long)rhp->func,
|
||||
rdp->qlen_lazy, rdp->qlen);
|
||||
else
|
||||
trace_rcu_callback(rdp->rsp->name, rhp,
|
||||
rdp->qlen_lazy, rdp->qlen);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -2266,95 +2159,36 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
}
|
||||
|
||||
/*
|
||||
* There must be at least one non-no-CBs CPU in operation at any given
|
||||
* time, because no-CBs CPUs are not capable of initiating grace periods
|
||||
* independently. This function therefore complains if the specified
|
||||
* CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
|
||||
* avoid offlining the last such CPU. (Recursion is a wonderful thing,
|
||||
* but you have to have a base case!)
|
||||
* If necessary, kick off a new grace period, and either way wait
|
||||
* for a subsequent grace period to complete.
|
||||
*/
|
||||
static bool nocb_cpu_expendable(int cpu)
|
||||
static void rcu_nocb_wait_gp(struct rcu_data *rdp)
|
||||
{
|
||||
cpumask_var_t non_nocb_cpus;
|
||||
int ret;
|
||||
unsigned long c;
|
||||
bool d;
|
||||
unsigned long flags;
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
c = rcu_start_future_gp(rnp, rdp);
|
||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||
|
||||
/*
|
||||
* If there are no no-CB CPUs or if this CPU is not a no-CB CPU,
|
||||
* then offlining this CPU is harmless. Let it happen.
|
||||
* Wait for the grace period. Do so interruptibly to avoid messing
|
||||
* up the load average.
|
||||
*/
|
||||
if (!have_rcu_nocb_mask || rcu_is_nocb_cpu(cpu))
|
||||
return 1;
|
||||
|
||||
/* If no memory, play it safe and keep the CPU around. */
|
||||
if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO))
|
||||
return 0;
|
||||
cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask);
|
||||
cpumask_clear_cpu(cpu, non_nocb_cpus);
|
||||
ret = !cpumask_empty(non_nocb_cpus);
|
||||
free_cpumask_var(non_nocb_cpus);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper structure for remote registry of RCU callbacks.
|
||||
* This is needed for when a no-CBs CPU needs to start a grace period.
|
||||
* If it just invokes call_rcu(), the resulting callback will be queued,
|
||||
* which can result in deadlock.
|
||||
*/
|
||||
struct rcu_head_remote {
|
||||
struct rcu_head *rhp;
|
||||
call_rcu_func_t *crf;
|
||||
void (*func)(struct rcu_head *rhp);
|
||||
};
|
||||
|
||||
/*
|
||||
* Register a callback as specified by the rcu_head_remote struct.
|
||||
* This function is intended to be invoked via smp_call_function_single().
|
||||
*/
|
||||
static void call_rcu_local(void *arg)
|
||||
{
|
||||
struct rcu_head_remote *rhrp =
|
||||
container_of(arg, struct rcu_head_remote, rhp);
|
||||
|
||||
rhrp->crf(rhrp->rhp, rhrp->func);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up an rcu_head_remote structure and the invoke call_rcu_local()
|
||||
* on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
|
||||
* smp_call_function_single().
|
||||
*/
|
||||
static void invoke_crf_remote(struct rcu_head *rhp,
|
||||
void (*func)(struct rcu_head *rhp),
|
||||
call_rcu_func_t crf)
|
||||
{
|
||||
struct rcu_head_remote rhr;
|
||||
|
||||
rhr.rhp = rhp;
|
||||
rhr.crf = crf;
|
||||
rhr.func = func;
|
||||
smp_call_function_single(0, call_rcu_local, &rhr, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper functions to be passed to wait_rcu_gp(), each of which
|
||||
* invokes invoke_crf_remote() to register a callback appropriately.
|
||||
*/
|
||||
static void __maybe_unused
|
||||
call_rcu_preempt_remote(struct rcu_head *rhp,
|
||||
void (*func)(struct rcu_head *rhp))
|
||||
{
|
||||
invoke_crf_remote(rhp, func, call_rcu);
|
||||
}
|
||||
static void call_rcu_bh_remote(struct rcu_head *rhp,
|
||||
void (*func)(struct rcu_head *rhp))
|
||||
{
|
||||
invoke_crf_remote(rhp, func, call_rcu_bh);
|
||||
}
|
||||
static void call_rcu_sched_remote(struct rcu_head *rhp,
|
||||
void (*func)(struct rcu_head *rhp))
|
||||
{
|
||||
invoke_crf_remote(rhp, func, call_rcu_sched);
|
||||
trace_rcu_future_gp(rnp, rdp, c, "StartWait");
|
||||
for (;;) {
|
||||
wait_event_interruptible(
|
||||
rnp->nocb_gp_wq[c & 0x1],
|
||||
(d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
|
||||
if (likely(d))
|
||||
break;
|
||||
flush_signals(current);
|
||||
trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");
|
||||
}
|
||||
trace_rcu_future_gp(rnp, rdp, c, "EndWait");
|
||||
smp_mb(); /* Ensure that CB invocation happens after GP end. */
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2391,7 +2225,7 @@ static int rcu_nocb_kthread(void *arg)
|
||||
cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
|
||||
ACCESS_ONCE(rdp->nocb_p_count) += c;
|
||||
ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
|
||||
wait_rcu_gp(rdp->rsp->call_remote);
|
||||
rcu_nocb_wait_gp(rdp);
|
||||
|
||||
/* Each pass through the following loop invokes a callback. */
|
||||
trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
|
||||
@@ -2437,33 +2271,42 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
|
||||
return;
|
||||
for_each_cpu(cpu, rcu_nocb_mask) {
|
||||
rdp = per_cpu_ptr(rsp->rda, cpu);
|
||||
t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu);
|
||||
t = kthread_run(rcu_nocb_kthread, rdp,
|
||||
"rcuo%c/%d", rsp->abbr, cpu);
|
||||
BUG_ON(IS_ERR(t));
|
||||
ACCESS_ONCE(rdp->nocb_kthread) = t;
|
||||
}
|
||||
}
|
||||
|
||||
/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
|
||||
static void init_nocb_callback_list(struct rcu_data *rdp)
|
||||
static bool init_nocb_callback_list(struct rcu_data *rdp)
|
||||
{
|
||||
if (rcu_nocb_mask == NULL ||
|
||||
!cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
|
||||
return;
|
||||
return false;
|
||||
rdp->nxttail[RCU_NEXT_TAIL] = NULL;
|
||||
}
|
||||
|
||||
/* Initialize the ->call_remote fields in the rcu_state structures. */
|
||||
static void __init rcu_init_nocb(void)
|
||||
{
|
||||
#ifdef CONFIG_PREEMPT_RCU
|
||||
rcu_preempt_state.call_remote = call_rcu_preempt_remote;
|
||||
#endif /* #ifdef CONFIG_PREEMPT_RCU */
|
||||
rcu_bh_state.call_remote = call_rcu_bh_remote;
|
||||
rcu_sched_state.call_remote = call_rcu_sched_remote;
|
||||
return true;
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
||||
static int rcu_nocb_needs_gp(struct rcu_state *rsp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
||||
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
|
||||
{
|
||||
}
|
||||
|
||||
static void rcu_init_one_nocb(struct rcu_node *rnp)
|
||||
{
|
||||
}
|
||||
|
||||
static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
|
||||
bool lazy)
|
||||
{
|
||||
@@ -2476,11 +2319,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool nocb_cpu_expendable(int cpu)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
|
||||
{
|
||||
}
|
||||
@@ -2489,12 +2327,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
|
||||
{
|
||||
}
|
||||
|
||||
static void init_nocb_callback_list(struct rcu_data *rdp)
|
||||
{
|
||||
}
|
||||
|
||||
static void __init rcu_init_nocb(void)
|
||||
static bool init_nocb_callback_list(struct rcu_data *rdp)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
|
||||
|
@@ -46,8 +46,6 @@
|
||||
#define RCU_TREE_NONCORE
|
||||
#include "rcutree.h"
|
||||
|
||||
#define ulong2long(a) (*(long *)(&(a)))
|
||||
|
||||
static int r_open(struct inode *inode, struct file *file,
|
||||
const struct seq_operations *op)
|
||||
{
|
||||
|
@@ -21,6 +21,7 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/mm.h>
|
||||
#include <asm/io.h>
|
||||
|
||||
|
||||
@@ -50,6 +51,14 @@ struct resource_constraint {
|
||||
|
||||
static DEFINE_RWLOCK(resource_lock);
|
||||
|
||||
/*
|
||||
* For memory hotplug, there is no way to free resource entries allocated
|
||||
* by boot mem after the system is up. So for reusing the resource entry
|
||||
* we need to remember the resource.
|
||||
*/
|
||||
static struct resource *bootmem_resource_free;
|
||||
static DEFINE_SPINLOCK(bootmem_resource_lock);
|
||||
|
||||
static void *r_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct resource *p = v;
|
||||
@@ -151,6 +160,40 @@ __initcall(ioresources_init);
|
||||
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
static void free_resource(struct resource *res)
|
||||
{
|
||||
if (!res)
|
||||
return;
|
||||
|
||||
if (!PageSlab(virt_to_head_page(res))) {
|
||||
spin_lock(&bootmem_resource_lock);
|
||||
res->sibling = bootmem_resource_free;
|
||||
bootmem_resource_free = res;
|
||||
spin_unlock(&bootmem_resource_lock);
|
||||
} else {
|
||||
kfree(res);
|
||||
}
|
||||
}
|
||||
|
||||
static struct resource *alloc_resource(gfp_t flags)
|
||||
{
|
||||
struct resource *res = NULL;
|
||||
|
||||
spin_lock(&bootmem_resource_lock);
|
||||
if (bootmem_resource_free) {
|
||||
res = bootmem_resource_free;
|
||||
bootmem_resource_free = res->sibling;
|
||||
}
|
||||
spin_unlock(&bootmem_resource_lock);
|
||||
|
||||
if (res)
|
||||
memset(res, 0, sizeof(struct resource));
|
||||
else
|
||||
res = kzalloc(sizeof(struct resource), flags);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Return the conflict entry if you can't request it */
|
||||
static struct resource * __request_resource(struct resource *root, struct resource *new)
|
||||
{
|
||||
@@ -706,24 +749,13 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
|
||||
write_unlock(&resource_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* adjust_resource - modify a resource's start and size
|
||||
* @res: resource to modify
|
||||
* @start: new start value
|
||||
* @size: new size
|
||||
*
|
||||
* Given an existing resource, change its start and size to match the
|
||||
* arguments. Returns 0 on success, -EBUSY if it can't fit.
|
||||
* Existing children of the resource are assumed to be immutable.
|
||||
*/
|
||||
int adjust_resource(struct resource *res, resource_size_t start, resource_size_t size)
|
||||
static int __adjust_resource(struct resource *res, resource_size_t start,
|
||||
resource_size_t size)
|
||||
{
|
||||
struct resource *tmp, *parent = res->parent;
|
||||
resource_size_t end = start + size - 1;
|
||||
int result = -EBUSY;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
|
||||
if (!parent)
|
||||
goto skip;
|
||||
|
||||
@@ -751,6 +783,26 @@ skip:
|
||||
result = 0;
|
||||
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* adjust_resource - modify a resource's start and size
|
||||
* @res: resource to modify
|
||||
* @start: new start value
|
||||
* @size: new size
|
||||
*
|
||||
* Given an existing resource, change its start and size to match the
|
||||
* arguments. Returns 0 on success, -EBUSY if it can't fit.
|
||||
* Existing children of the resource are assumed to be immutable.
|
||||
*/
|
||||
int adjust_resource(struct resource *res, resource_size_t start,
|
||||
resource_size_t size)
|
||||
{
|
||||
int result;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
result = __adjust_resource(res, start, size);
|
||||
write_unlock(&resource_lock);
|
||||
return result;
|
||||
}
|
||||
@@ -762,7 +814,7 @@ static void __init __reserve_region_with_split(struct resource *root,
|
||||
{
|
||||
struct resource *parent = root;
|
||||
struct resource *conflict;
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_ATOMIC);
|
||||
struct resource *res = alloc_resource(GFP_ATOMIC);
|
||||
struct resource *next_res = NULL;
|
||||
|
||||
if (!res)
|
||||
@@ -787,7 +839,7 @@ static void __init __reserve_region_with_split(struct resource *root,
|
||||
/* conflict covered whole area */
|
||||
if (conflict->start <= res->start &&
|
||||
conflict->end >= res->end) {
|
||||
kfree(res);
|
||||
free_resource(res);
|
||||
WARN_ON(next_res);
|
||||
break;
|
||||
}
|
||||
@@ -797,10 +849,9 @@ static void __init __reserve_region_with_split(struct resource *root,
|
||||
end = res->end;
|
||||
res->end = conflict->start - 1;
|
||||
if (conflict->end < end) {
|
||||
next_res = kzalloc(sizeof(*next_res),
|
||||
GFP_ATOMIC);
|
||||
next_res = alloc_resource(GFP_ATOMIC);
|
||||
if (!next_res) {
|
||||
kfree(res);
|
||||
free_resource(res);
|
||||
break;
|
||||
}
|
||||
next_res->name = name;
|
||||
@@ -890,7 +941,7 @@ struct resource * __request_region(struct resource *parent,
|
||||
const char *name, int flags)
|
||||
{
|
||||
DECLARE_WAITQUEUE(wait, current);
|
||||
struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
|
||||
struct resource *res = alloc_resource(GFP_KERNEL);
|
||||
|
||||
if (!res)
|
||||
return NULL;
|
||||
@@ -924,7 +975,7 @@ struct resource * __request_region(struct resource *parent,
|
||||
continue;
|
||||
}
|
||||
/* Uhhuh, that didn't work out.. */
|
||||
kfree(res);
|
||||
free_resource(res);
|
||||
res = NULL;
|
||||
break;
|
||||
}
|
||||
@@ -958,7 +1009,7 @@ int __check_region(struct resource *parent, resource_size_t start,
|
||||
return -EBUSY;
|
||||
|
||||
release_resource(res);
|
||||
kfree(res);
|
||||
free_resource(res);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(__check_region);
|
||||
@@ -998,7 +1049,7 @@ void __release_region(struct resource *parent, resource_size_t start,
|
||||
write_unlock(&resource_lock);
|
||||
if (res->flags & IORESOURCE_MUXED)
|
||||
wake_up(&muxed_resource_wait);
|
||||
kfree(res);
|
||||
free_resource(res);
|
||||
return;
|
||||
}
|
||||
p = &res->sibling;
|
||||
@@ -1012,6 +1063,109 @@ void __release_region(struct resource *parent, resource_size_t start,
|
||||
}
|
||||
EXPORT_SYMBOL(__release_region);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
/**
|
||||
* release_mem_region_adjustable - release a previously reserved memory region
|
||||
* @parent: parent resource descriptor
|
||||
* @start: resource start address
|
||||
* @size: resource region size
|
||||
*
|
||||
* This interface is intended for memory hot-delete. The requested region
|
||||
* is released from a currently busy memory resource. The requested region
|
||||
* must either match exactly or fit into a single busy resource entry. In
|
||||
* the latter case, the remaining resource is adjusted accordingly.
|
||||
* Existing children of the busy memory resource must be immutable in the
|
||||
* request.
|
||||
*
|
||||
* Note:
|
||||
* - Additional release conditions, such as overlapping region, can be
|
||||
* supported after they are confirmed as valid cases.
|
||||
* - When a busy memory resource gets split into two entries, the code
|
||||
* assumes that all children remain in the lower address entry for
|
||||
* simplicity. Enhance this logic when necessary.
|
||||
*/
|
||||
int release_mem_region_adjustable(struct resource *parent,
|
||||
resource_size_t start, resource_size_t size)
|
||||
{
|
||||
struct resource **p;
|
||||
struct resource *res;
|
||||
struct resource *new_res;
|
||||
resource_size_t end;
|
||||
int ret = -EINVAL;
|
||||
|
||||
end = start + size - 1;
|
||||
if ((start < parent->start) || (end > parent->end))
|
||||
return ret;
|
||||
|
||||
/* The alloc_resource() result gets checked later */
|
||||
new_res = alloc_resource(GFP_KERNEL);
|
||||
|
||||
p = &parent->child;
|
||||
write_lock(&resource_lock);
|
||||
|
||||
while ((res = *p)) {
|
||||
if (res->start >= end)
|
||||
break;
|
||||
|
||||
/* look for the next resource if it does not fit into */
|
||||
if (res->start > start || res->end < end) {
|
||||
p = &res->sibling;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!(res->flags & IORESOURCE_MEM))
|
||||
break;
|
||||
|
||||
if (!(res->flags & IORESOURCE_BUSY)) {
|
||||
p = &res->child;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* found the target resource; let's adjust accordingly */
|
||||
if (res->start == start && res->end == end) {
|
||||
/* free the whole entry */
|
||||
*p = res->sibling;
|
||||
free_resource(res);
|
||||
ret = 0;
|
||||
} else if (res->start == start && res->end != end) {
|
||||
/* adjust the start */
|
||||
ret = __adjust_resource(res, end + 1,
|
||||
res->end - end);
|
||||
} else if (res->start != start && res->end == end) {
|
||||
/* adjust the end */
|
||||
ret = __adjust_resource(res, res->start,
|
||||
start - res->start);
|
||||
} else {
|
||||
/* split into two entries */
|
||||
if (!new_res) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
new_res->name = res->name;
|
||||
new_res->start = end + 1;
|
||||
new_res->end = res->end;
|
||||
new_res->flags = res->flags;
|
||||
new_res->parent = res->parent;
|
||||
new_res->sibling = res->sibling;
|
||||
new_res->child = NULL;
|
||||
|
||||
ret = __adjust_resource(res, res->start,
|
||||
start - res->start);
|
||||
if (ret)
|
||||
break;
|
||||
res->sibling = new_res;
|
||||
new_res = NULL;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
write_unlock(&resource_lock);
|
||||
free_resource(new_res);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
||||
/*
|
||||
* Managed region resource
|
||||
*/
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/stat.h>
|
||||
|
||||
#include "rtmutex.h"
|
||||
|
||||
@@ -366,8 +367,8 @@ static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *at
|
||||
return curr - buf;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(status, 0600, sysfs_test_status, NULL);
|
||||
static DEVICE_ATTR(command, 0600, NULL, sysfs_test_command);
|
||||
static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL);
|
||||
static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command);
|
||||
|
||||
static struct bus_type rttest_subsys = {
|
||||
.name = "rttest",
|
||||
|
@@ -16,3 +16,4 @@ obj-$(CONFIG_SMP) += cpupri.o
|
||||
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
|
||||
obj-$(CONFIG_SCHEDSTATS) += stats.o
|
||||
obj-$(CONFIG_SCHED_DEBUG) += debug.o
|
||||
obj-$(CONFIG_CGROUP_CPUACCT) += cpuacct.o
|
||||
|
@@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
|
||||
u64 this_clock, remote_clock;
|
||||
u64 *ptr, old_val, val;
|
||||
|
||||
#if BITS_PER_LONG != 64
|
||||
again:
|
||||
/*
|
||||
* Careful here: The local and the remote clock values need to
|
||||
* be read out atomic as we need to compare the values and
|
||||
* then update either the local or the remote side. So the
|
||||
* cmpxchg64 below only protects one readout.
|
||||
*
|
||||
* We must reread via sched_clock_local() in the retry case on
|
||||
* 32bit as an NMI could use sched_clock_local() via the
|
||||
* tracer and hit between the readout of
|
||||
* the low32bit and the high 32bit portion.
|
||||
*/
|
||||
this_clock = sched_clock_local(my_scd);
|
||||
/*
|
||||
* We must enforce atomic readout on 32bit, otherwise the
|
||||
* update on the remote cpu can hit inbetween the readout of
|
||||
* the low32bit and the high 32bit portion.
|
||||
*/
|
||||
remote_clock = cmpxchg64(&scd->clock, 0, 0);
|
||||
#else
|
||||
/*
|
||||
* On 64bit the read of [my]scd->clock is atomic versus the
|
||||
* update, so we can avoid the above 32bit dance.
|
||||
*/
|
||||
sched_clock_local(my_scd);
|
||||
again:
|
||||
this_clock = my_scd->clock;
|
||||
remote_clock = scd->clock;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use the opportunity that we have both locks
|
||||
|
@@ -512,11 +512,6 @@ static inline void init_hrtick(void)
|
||||
* the target CPU.
|
||||
*/
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
#ifndef tsk_is_polling
|
||||
#define tsk_is_polling(t) 0
|
||||
#endif
|
||||
|
||||
void resched_task(struct task_struct *p)
|
||||
{
|
||||
int cpu;
|
||||
@@ -1536,8 +1531,10 @@ static void try_to_wake_up_local(struct task_struct *p)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
|
||||
BUG_ON(rq != this_rq());
|
||||
BUG_ON(p == current);
|
||||
if (WARN_ON_ONCE(rq != this_rq()) ||
|
||||
WARN_ON_ONCE(p == current))
|
||||
return;
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
if (!raw_spin_trylock(&p->pi_lock)) {
|
||||
@@ -3037,51 +3034,6 @@ void __sched schedule_preempt_disabled(void)
|
||||
preempt_disable();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
|
||||
|
||||
static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
|
||||
{
|
||||
if (lock->owner != owner)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Ensure we emit the owner->on_cpu, dereference _after_ checking
|
||||
* lock->owner still matches owner, if that fails, owner might
|
||||
* point to free()d memory, if it still matches, the rcu_read_lock()
|
||||
* ensures the memory stays valid.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
return owner->on_cpu;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look out! "owner" is an entirely speculative pointer
|
||||
* access and not reliable.
|
||||
*/
|
||||
int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
|
||||
{
|
||||
if (!sched_feat(OWNER_SPIN))
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
while (owner_running(lock, owner)) {
|
||||
if (need_resched())
|
||||
break;
|
||||
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* We break out the loop above on need_resched() and when the
|
||||
* owner changed, which is a sign for heavy contention. Return
|
||||
* success only when lock->owner is NULL.
|
||||
*/
|
||||
return lock->owner == NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/*
|
||||
* this is the entry point to schedule() from in-kernel preemption
|
||||
@@ -4170,6 +4122,10 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
get_task_struct(p);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (p->flags & PF_NO_SETAFFINITY) {
|
||||
retval = -EINVAL;
|
||||
goto out_put_task;
|
||||
}
|
||||
if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
|
||||
retval = -ENOMEM;
|
||||
goto out_put_task;
|
||||
@@ -4817,11 +4773,6 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
do_set_cpus_allowed(p, new_mask);
|
||||
|
||||
/* Can the task run on the task's current CPU? If so, we're done */
|
||||
@@ -5043,7 +4994,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
|
||||
}
|
||||
|
||||
static int min_load_idx = 0;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX;
|
||||
static int max_load_idx = CPU_LOAD_IDX_MAX-1;
|
||||
|
||||
static void
|
||||
set_table_entry(struct ctl_table *entry,
|
||||
@@ -6292,7 +6243,7 @@ static void sched_init_numa(void)
|
||||
* 'level' contains the number of unique distances, excluding the
|
||||
* identity distance node_distance(i,i).
|
||||
*
|
||||
* The sched_domains_nume_distance[] array includes the actual distance
|
||||
* The sched_domains_numa_distance[] array includes the actual distance
|
||||
* numbers.
|
||||
*/
|
||||
|
||||
@@ -6913,7 +6864,7 @@ struct task_group root_task_group;
|
||||
LIST_HEAD(task_groups);
|
||||
#endif
|
||||
|
||||
DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
|
||||
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
|
||||
void __init sched_init(void)
|
||||
{
|
||||
@@ -6950,7 +6901,7 @@ void __init sched_init(void)
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
#ifdef CONFIG_CPUMASK_OFFSTACK
|
||||
for_each_possible_cpu(i) {
|
||||
per_cpu(load_balance_tmpmask, i) = (void *)ptr;
|
||||
per_cpu(load_balance_mask, i) = (void *)ptr;
|
||||
ptr += cpumask_size();
|
||||
}
|
||||
#endif /* CONFIG_CPUMASK_OFFSTACK */
|
||||
@@ -6976,12 +6927,6 @@ void __init sched_init(void)
|
||||
|
||||
#endif /* CONFIG_CGROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
root_cpuacct.cpustat = &kernel_cpustat;
|
||||
root_cpuacct.cpuusage = alloc_percpu(u64);
|
||||
/* Too early, not expected to fail */
|
||||
BUG_ON(!root_cpuacct.cpuusage);
|
||||
#endif
|
||||
for_each_possible_cpu(i) {
|
||||
struct rq *rq;
|
||||
|
||||
@@ -8083,226 +8028,6 @@ struct cgroup_subsys cpu_cgroup_subsys = {
|
||||
|
||||
#endif /* CONFIG_CGROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
|
||||
/*
|
||||
* CPU accounting code for task groups.
|
||||
*
|
||||
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
||||
* (balbir@in.ibm.com).
|
||||
*/
|
||||
|
||||
struct cpuacct root_cpuacct;
|
||||
|
||||
/* create a new cpu accounting group */
|
||||
static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
|
||||
if (!cgrp->parent)
|
||||
return &root_cpuacct.css;
|
||||
|
||||
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
||||
if (!ca)
|
||||
goto out;
|
||||
|
||||
ca->cpuusage = alloc_percpu(u64);
|
||||
if (!ca->cpuusage)
|
||||
goto out_free_ca;
|
||||
|
||||
ca->cpustat = alloc_percpu(struct kernel_cpustat);
|
||||
if (!ca->cpustat)
|
||||
goto out_free_cpuusage;
|
||||
|
||||
return &ca->css;
|
||||
|
||||
out_free_cpuusage:
|
||||
free_percpu(ca->cpuusage);
|
||||
out_free_ca:
|
||||
kfree(ca);
|
||||
out:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* destroy an existing cpu accounting group */
|
||||
static void cpuacct_css_free(struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
|
||||
free_percpu(ca->cpustat);
|
||||
free_percpu(ca->cpuusage);
|
||||
kfree(ca);
|
||||
}
|
||||
|
||||
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
||||
{
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
u64 data;
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
||||
*/
|
||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
data = *cpuusage;
|
||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||
#else
|
||||
data = *cpuusage;
|
||||
#endif
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
||||
{
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
||||
*/
|
||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
*cpuusage = val;
|
||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||
#else
|
||||
*cpuusage = val;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return total cpu usage (in nanoseconds) of a group */
|
||||
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
u64 totalcpuusage = 0;
|
||||
int i;
|
||||
|
||||
for_each_present_cpu(i)
|
||||
totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
||||
|
||||
return totalcpuusage;
|
||||
}
|
||||
|
||||
static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
|
||||
u64 reset)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
if (reset) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_present_cpu(i)
|
||||
cpuacct_cpuusage_write(ca, i, 0);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
||||
struct seq_file *m)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgroup);
|
||||
u64 percpu;
|
||||
int i;
|
||||
|
||||
for_each_present_cpu(i) {
|
||||
percpu = cpuacct_cpuusage_read(ca, i);
|
||||
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char *cpuacct_stat_desc[] = {
|
||||
[CPUACCT_STAT_USER] = "user",
|
||||
[CPUACCT_STAT_SYSTEM] = "system",
|
||||
};
|
||||
|
||||
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct cgroup_map_cb *cb)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int cpu;
|
||||
s64 val = 0;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_USER];
|
||||
val += kcpustat->cpustat[CPUTIME_NICE];
|
||||
}
|
||||
val = cputime64_to_clock_t(val);
|
||||
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
||||
|
||||
val = 0;
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
||||
val += kcpustat->cpustat[CPUTIME_IRQ];
|
||||
val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
|
||||
}
|
||||
|
||||
val = cputime64_to_clock_t(val);
|
||||
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "usage",
|
||||
.read_u64 = cpuusage_read,
|
||||
.write_u64 = cpuusage_write,
|
||||
},
|
||||
{
|
||||
.name = "usage_percpu",
|
||||
.read_seq_string = cpuacct_percpu_seq_read,
|
||||
},
|
||||
{
|
||||
.name = "stat",
|
||||
.read_map = cpuacct_stats_show,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
/*
|
||||
* charge this task's execution time to its accounting group.
|
||||
*
|
||||
* called with rq->lock held.
|
||||
*/
|
||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
int cpu;
|
||||
|
||||
if (unlikely(!cpuacct_subsys.active))
|
||||
return;
|
||||
|
||||
cpu = task_cpu(tsk);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
ca = task_ca(tsk);
|
||||
|
||||
for (; ca; ca = parent_ca(ca)) {
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
*cpuusage += cputime;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuacct_subsys = {
|
||||
.name = "cpuacct",
|
||||
.css_alloc = cpuacct_css_alloc,
|
||||
.css_free = cpuacct_css_free,
|
||||
.subsys_id = cpuacct_subsys_id,
|
||||
.base_cftypes = files,
|
||||
};
|
||||
#endif /* CONFIG_CGROUP_CPUACCT */
|
||||
|
||||
void dump_cpu_task(int cpu)
|
||||
{
|
||||
pr_info("Task dump for CPU %d:\n", cpu);
|
||||
|
296
kernel/sched/cpuacct.c
Normal file
296
kernel/sched/cpuacct.c
Normal file
@@ -0,0 +1,296 @@
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
/*
|
||||
* CPU accounting code for task groups.
|
||||
*
|
||||
* Based on the work by Paul Menage (menage@google.com) and Balbir Singh
|
||||
* (balbir@in.ibm.com).
|
||||
*/
|
||||
|
||||
/* Time spent by the tasks of the cpu accounting group executing in ... */
|
||||
enum cpuacct_stat_index {
|
||||
CPUACCT_STAT_USER, /* ... user mode */
|
||||
CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
||||
|
||||
CPUACCT_STAT_NSTATS,
|
||||
};
|
||||
|
||||
/* track cpu usage of a group of tasks and its child groups */
|
||||
struct cpuacct {
|
||||
struct cgroup_subsys_state css;
|
||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||
u64 __percpu *cpuusage;
|
||||
struct kernel_cpustat __percpu *cpustat;
|
||||
};
|
||||
|
||||
/* return cpu accounting group corresponding to this container */
|
||||
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
|
||||
{
|
||||
return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
|
||||
struct cpuacct, css);
|
||||
}
|
||||
|
||||
/* return cpu accounting group to which this task belongs */
|
||||
static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
||||
{
|
||||
return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
|
||||
struct cpuacct, css);
|
||||
}
|
||||
|
||||
static inline struct cpuacct *__parent_ca(struct cpuacct *ca)
|
||||
{
|
||||
return cgroup_ca(ca->css.cgroup->parent);
|
||||
}
|
||||
|
||||
static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
||||
{
|
||||
if (!ca->css.cgroup->parent)
|
||||
return NULL;
|
||||
return cgroup_ca(ca->css.cgroup->parent);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
|
||||
static struct cpuacct root_cpuacct = {
|
||||
.cpustat = &kernel_cpustat,
|
||||
.cpuusage = &root_cpuacct_cpuusage,
|
||||
};
|
||||
|
||||
/* create a new cpu accounting group */
|
||||
static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
|
||||
if (!cgrp->parent)
|
||||
return &root_cpuacct.css;
|
||||
|
||||
ca = kzalloc(sizeof(*ca), GFP_KERNEL);
|
||||
if (!ca)
|
||||
goto out;
|
||||
|
||||
ca->cpuusage = alloc_percpu(u64);
|
||||
if (!ca->cpuusage)
|
||||
goto out_free_ca;
|
||||
|
||||
ca->cpustat = alloc_percpu(struct kernel_cpustat);
|
||||
if (!ca->cpustat)
|
||||
goto out_free_cpuusage;
|
||||
|
||||
return &ca->css;
|
||||
|
||||
out_free_cpuusage:
|
||||
free_percpu(ca->cpuusage);
|
||||
out_free_ca:
|
||||
kfree(ca);
|
||||
out:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
/* destroy an existing cpu accounting group */
|
||||
static void cpuacct_css_free(struct cgroup *cgrp)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
|
||||
free_percpu(ca->cpustat);
|
||||
free_percpu(ca->cpuusage);
|
||||
kfree(ca);
|
||||
}
|
||||
|
||||
static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
|
||||
{
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
u64 data;
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
* Take rq->lock to make 64-bit read safe on 32-bit platforms.
|
||||
*/
|
||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
data = *cpuusage;
|
||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||
#else
|
||||
data = *cpuusage;
|
||||
#endif
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
|
||||
{
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
|
||||
#ifndef CONFIG_64BIT
|
||||
/*
|
||||
* Take rq->lock to make 64-bit write safe on 32-bit platforms.
|
||||
*/
|
||||
raw_spin_lock_irq(&cpu_rq(cpu)->lock);
|
||||
*cpuusage = val;
|
||||
raw_spin_unlock_irq(&cpu_rq(cpu)->lock);
|
||||
#else
|
||||
*cpuusage = val;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* return total cpu usage (in nanoseconds) of a group */
|
||||
static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
u64 totalcpuusage = 0;
|
||||
int i;
|
||||
|
||||
for_each_present_cpu(i)
|
||||
totalcpuusage += cpuacct_cpuusage_read(ca, i);
|
||||
|
||||
return totalcpuusage;
|
||||
}
|
||||
|
||||
static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype,
|
||||
u64 reset)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int err = 0;
|
||||
int i;
|
||||
|
||||
if (reset) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for_each_present_cpu(i)
|
||||
cpuacct_cpuusage_write(ca, i, 0);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
|
||||
struct seq_file *m)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgroup);
|
||||
u64 percpu;
|
||||
int i;
|
||||
|
||||
for_each_present_cpu(i) {
|
||||
percpu = cpuacct_cpuusage_read(ca, i);
|
||||
seq_printf(m, "%llu ", (unsigned long long) percpu);
|
||||
}
|
||||
seq_printf(m, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const char * const cpuacct_stat_desc[] = {
|
||||
[CPUACCT_STAT_USER] = "user",
|
||||
[CPUACCT_STAT_SYSTEM] = "system",
|
||||
};
|
||||
|
||||
static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
|
||||
struct cgroup_map_cb *cb)
|
||||
{
|
||||
struct cpuacct *ca = cgroup_ca(cgrp);
|
||||
int cpu;
|
||||
s64 val = 0;
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_USER];
|
||||
val += kcpustat->cpustat[CPUTIME_NICE];
|
||||
}
|
||||
val = cputime64_to_clock_t(val);
|
||||
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val);
|
||||
|
||||
val = 0;
|
||||
for_each_online_cpu(cpu) {
|
||||
struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu);
|
||||
val += kcpustat->cpustat[CPUTIME_SYSTEM];
|
||||
val += kcpustat->cpustat[CPUTIME_IRQ];
|
||||
val += kcpustat->cpustat[CPUTIME_SOFTIRQ];
|
||||
}
|
||||
|
||||
val = cputime64_to_clock_t(val);
|
||||
cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct cftype files[] = {
|
||||
{
|
||||
.name = "usage",
|
||||
.read_u64 = cpuusage_read,
|
||||
.write_u64 = cpuusage_write,
|
||||
},
|
||||
{
|
||||
.name = "usage_percpu",
|
||||
.read_seq_string = cpuacct_percpu_seq_read,
|
||||
},
|
||||
{
|
||||
.name = "stat",
|
||||
.read_map = cpuacct_stats_show,
|
||||
},
|
||||
{ } /* terminate */
|
||||
};
|
||||
|
||||
/*
|
||||
* charge this task's execution time to its accounting group.
|
||||
*
|
||||
* called with rq->lock held.
|
||||
*/
|
||||
void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
struct cpuacct *ca;
|
||||
int cpu;
|
||||
|
||||
cpu = task_cpu(tsk);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
ca = task_ca(tsk);
|
||||
|
||||
while (true) {
|
||||
u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
|
||||
*cpuusage += cputime;
|
||||
|
||||
ca = parent_ca(ca);
|
||||
if (!ca)
|
||||
break;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Add user/system time to cpuacct.
|
||||
*
|
||||
* Note: it's the caller that updates the account of the root cgroup.
|
||||
*/
|
||||
void cpuacct_account_field(struct task_struct *p, int index, u64 val)
|
||||
{
|
||||
struct kernel_cpustat *kcpustat;
|
||||
struct cpuacct *ca;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = task_ca(p);
|
||||
while (ca != &root_cpuacct) {
|
||||
kcpustat = this_cpu_ptr(ca->cpustat);
|
||||
kcpustat->cpustat[index] += val;
|
||||
ca = __parent_ca(ca);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
struct cgroup_subsys cpuacct_subsys = {
|
||||
.name = "cpuacct",
|
||||
.css_alloc = cpuacct_css_alloc,
|
||||
.css_free = cpuacct_css_free,
|
||||
.subsys_id = cpuacct_subsys_id,
|
||||
.base_cftypes = files,
|
||||
.early_init = 1,
|
||||
};
|
17
kernel/sched/cpuacct.h
Normal file
17
kernel/sched/cpuacct.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
|
||||
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
|
||||
extern void cpuacct_account_field(struct task_struct *p, int index, u64 val);
|
||||
|
||||
#else
|
||||
|
||||
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
cpuacct_account_field(struct task_struct *p, int index, u64 val)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
@@ -115,10 +115,6 @@ static int irqtime_account_si_update(void)
|
||||
static inline void task_group_account_field(struct task_struct *p, int index,
|
||||
u64 tmp)
|
||||
{
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
struct kernel_cpustat *kcpustat;
|
||||
struct cpuacct *ca;
|
||||
#endif
|
||||
/*
|
||||
* Since all updates are sure to touch the root cgroup, we
|
||||
* get ourselves ahead and touch it first. If the root cgroup
|
||||
@@ -127,19 +123,7 @@ static inline void task_group_account_field(struct task_struct *p, int index,
|
||||
*/
|
||||
__get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
if (unlikely(!cpuacct_subsys.active))
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
ca = task_ca(p);
|
||||
while (ca && (ca != &root_cpuacct)) {
|
||||
kcpustat = this_cpu_ptr(ca->cpustat);
|
||||
kcpustat->cpustat[index] += tmp;
|
||||
ca = parent_ca(ca);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
#endif
|
||||
cpuacct_account_field(p, index, tmp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -310,7 +294,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
|
||||
|
||||
t = tsk;
|
||||
do {
|
||||
task_cputime(tsk, &utime, &stime);
|
||||
task_cputime(t, &utime, &stime);
|
||||
times->utime += utime;
|
||||
times->stime += stime;
|
||||
times->sum_exec_runtime += task_sched_runtime(t);
|
||||
|
@@ -1563,6 +1563,27 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
|
||||
se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
|
||||
} /* migrations, e.g. sleep=0 leave decay_count == 0 */
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the rq's load with the elapsed running time before entering
|
||||
* idle. if the last scheduled task is not a CFS task, idle_enter will
|
||||
* be the only way to update the runnable statistic.
|
||||
*/
|
||||
void idle_enter_fair(struct rq *this_rq)
|
||||
{
|
||||
update_rq_runnable_avg(this_rq, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the rq's load with the elapsed idle time before a task is
|
||||
* scheduled. if the newly scheduled task is not a CFS task, idle_exit will
|
||||
* be the only way to update the runnable statistic.
|
||||
*/
|
||||
void idle_exit_fair(struct rq *this_rq)
|
||||
{
|
||||
update_rq_runnable_avg(this_rq, 0);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void update_entity_load_avg(struct sched_entity *se,
|
||||
int update_cfs_rq) {}
|
||||
@@ -3875,12 +3896,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
int tsk_cache_hot = 0;
|
||||
/*
|
||||
* We do not migrate tasks that are:
|
||||
* 1) running (obviously), or
|
||||
* 1) throttled_lb_pair, or
|
||||
* 2) cannot be migrated to this CPU due to cpus_allowed, or
|
||||
* 3) are cache-hot on their current CPU.
|
||||
* 3) running (obviously), or
|
||||
* 4) are cache-hot on their current CPU.
|
||||
*/
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
return 0;
|
||||
|
||||
if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
|
||||
int new_dst_cpu;
|
||||
int cpu;
|
||||
|
||||
schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
|
||||
|
||||
@@ -3895,12 +3920,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED))
|
||||
return 0;
|
||||
|
||||
new_dst_cpu = cpumask_first_and(env->dst_grpmask,
|
||||
tsk_cpus_allowed(p));
|
||||
if (new_dst_cpu < nr_cpu_ids) {
|
||||
env->flags |= LBF_SOME_PINNED;
|
||||
env->new_dst_cpu = new_dst_cpu;
|
||||
/* Prevent to re-select dst_cpu via env's cpus */
|
||||
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
|
||||
if (cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) {
|
||||
env->flags |= LBF_SOME_PINNED;
|
||||
env->new_dst_cpu = cpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3921,20 +3949,17 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
|
||||
tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
|
||||
if (!tsk_cache_hot ||
|
||||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
|
||||
#ifdef CONFIG_SCHEDSTATS
|
||||
|
||||
if (tsk_cache_hot) {
|
||||
schedstat_inc(env->sd, lb_hot_gained[env->idle]);
|
||||
schedstat_inc(p, se.statistics.nr_forced_migrations);
|
||||
}
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (tsk_cache_hot) {
|
||||
schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3949,9 +3974,6 @@ static int move_one_task(struct lb_env *env)
|
||||
struct task_struct *p, *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) {
|
||||
if (throttled_lb_pair(task_group(p), env->src_rq->cpu, env->dst_cpu))
|
||||
continue;
|
||||
|
||||
if (!can_migrate_task(p, env))
|
||||
continue;
|
||||
|
||||
@@ -4003,7 +4025,7 @@ static int move_tasks(struct lb_env *env)
|
||||
break;
|
||||
}
|
||||
|
||||
if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
|
||||
if (!can_migrate_task(p, env))
|
||||
goto next;
|
||||
|
||||
load = task_h_load(p);
|
||||
@@ -4014,9 +4036,6 @@ static int move_tasks(struct lb_env *env)
|
||||
if ((load / 2) > env->imbalance)
|
||||
goto next;
|
||||
|
||||
if (!can_migrate_task(p, env))
|
||||
goto next;
|
||||
|
||||
move_task(p, env);
|
||||
pulled++;
|
||||
env->imbalance -= load;
|
||||
@@ -4961,7 +4980,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
||||
#define MAX_PINNED_INTERVAL 512
|
||||
|
||||
/* Working cpumask for load_balance and load_balance_newidle. */
|
||||
DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
|
||||
DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
|
||||
static int need_active_balance(struct lb_env *env)
|
||||
{
|
||||
@@ -4992,11 +5011,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
int *balance)
|
||||
{
|
||||
int ld_moved, cur_ld_moved, active_balance = 0;
|
||||
int lb_iterations, max_lb_iterations;
|
||||
struct sched_group *group;
|
||||
struct rq *busiest;
|
||||
unsigned long flags;
|
||||
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
|
||||
struct cpumask *cpus = __get_cpu_var(load_balance_mask);
|
||||
|
||||
struct lb_env env = {
|
||||
.sd = sd,
|
||||
@@ -5008,8 +5026,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
|
||||
.cpus = cpus,
|
||||
};
|
||||
|
||||
/*
|
||||
* For NEWLY_IDLE load_balancing, we don't need to consider
|
||||
* other cpus in our group
|
||||
*/
|
||||
if (idle == CPU_NEWLY_IDLE)
|
||||
env.dst_grpmask = NULL;
|
||||
|
||||
cpumask_copy(cpus, cpu_active_mask);
|
||||
max_lb_iterations = cpumask_weight(env.dst_grpmask);
|
||||
|
||||
schedstat_inc(sd, lb_count[idle]);
|
||||
|
||||
@@ -5035,7 +5059,6 @@ redo:
|
||||
schedstat_add(sd, lb_imbalance[idle], env.imbalance);
|
||||
|
||||
ld_moved = 0;
|
||||
lb_iterations = 1;
|
||||
if (busiest->nr_running > 1) {
|
||||
/*
|
||||
* Attempt to move tasks. If find_busiest_group has found
|
||||
@@ -5062,17 +5085,17 @@ more_balance:
|
||||
double_rq_unlock(env.dst_rq, busiest);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (env.flags & LBF_NEED_BREAK) {
|
||||
env.flags &= ~LBF_NEED_BREAK;
|
||||
goto more_balance;
|
||||
}
|
||||
|
||||
/*
|
||||
* some other cpu did the load balance for us.
|
||||
*/
|
||||
if (cur_ld_moved && env.dst_cpu != smp_processor_id())
|
||||
resched_cpu(env.dst_cpu);
|
||||
|
||||
if (env.flags & LBF_NEED_BREAK) {
|
||||
env.flags &= ~LBF_NEED_BREAK;
|
||||
goto more_balance;
|
||||
}
|
||||
|
||||
/*
|
||||
* Revisit (affine) tasks on src_cpu that couldn't be moved to
|
||||
* us and move them to an alternate dst_cpu in our sched_group
|
||||
@@ -5092,14 +5115,17 @@ more_balance:
|
||||
* moreover subsequent load balance cycles should correct the
|
||||
* excess load moved.
|
||||
*/
|
||||
if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
|
||||
lb_iterations++ < max_lb_iterations) {
|
||||
if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
|
||||
|
||||
env.dst_rq = cpu_rq(env.new_dst_cpu);
|
||||
env.dst_cpu = env.new_dst_cpu;
|
||||
env.flags &= ~LBF_SOME_PINNED;
|
||||
env.loop = 0;
|
||||
env.loop_break = sched_nr_migrate_break;
|
||||
|
||||
/* Prevent to re-select dst_cpu via env's cpus */
|
||||
cpumask_clear_cpu(env.dst_cpu, env.cpus);
|
||||
|
||||
/*
|
||||
* Go back to "more_balance" rather than "redo" since we
|
||||
* need to continue with same src_cpu.
|
||||
@@ -5220,8 +5246,6 @@ void idle_balance(int this_cpu, struct rq *this_rq)
|
||||
if (this_rq->avg_idle < sysctl_sched_migration_cost)
|
||||
return;
|
||||
|
||||
update_rq_runnable_avg(this_rq, 1);
|
||||
|
||||
/*
|
||||
* Drop the rq->lock, but keep IRQ/preempt disabled.
|
||||
*/
|
||||
@@ -5396,13 +5420,16 @@ static inline void set_cpu_sd_state_busy(void)
|
||||
struct sched_domain *sd;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (!test_bit(NOHZ_IDLE, nohz_flags(cpu)))
|
||||
return;
|
||||
clear_bit(NOHZ_IDLE, nohz_flags(cpu));
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd)
|
||||
sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
|
||||
|
||||
if (!sd || !sd->nohz_idle)
|
||||
goto unlock;
|
||||
sd->nohz_idle = 0;
|
||||
|
||||
for (; sd; sd = sd->parent)
|
||||
atomic_inc(&sd->groups->sgp->nr_busy_cpus);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -5411,13 +5438,16 @@ void set_cpu_sd_state_idle(void)
|
||||
struct sched_domain *sd;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
if (test_bit(NOHZ_IDLE, nohz_flags(cpu)))
|
||||
return;
|
||||
set_bit(NOHZ_IDLE, nohz_flags(cpu));
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_domain(cpu, sd)
|
||||
sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
|
||||
|
||||
if (!sd || sd->nohz_idle)
|
||||
goto unlock;
|
||||
sd->nohz_idle = 1;
|
||||
|
||||
for (; sd; sd = sd->parent)
|
||||
atomic_dec(&sd->groups->sgp->nr_busy_cpus);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -5469,7 +5499,7 @@ void update_max_interval(void)
|
||||
* It checks each scheduling domain to see if it is due to be balanced,
|
||||
* and initiates a balancing operation if so.
|
||||
*
|
||||
* Balancing parameters are set up in arch_init_sched_domains.
|
||||
* Balancing parameters are set up in init_sched_domains.
|
||||
*/
|
||||
static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
||||
{
|
||||
@@ -5507,10 +5537,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
|
||||
if (time_after_eq(jiffies, sd->last_balance + interval)) {
|
||||
if (load_balance(cpu, rq, sd, idle, &balance)) {
|
||||
/*
|
||||
* We've pulled tasks over so either we're no
|
||||
* longer idle.
|
||||
* The LBF_SOME_PINNED logic could have changed
|
||||
* env->dst_cpu, so we can't know our idle
|
||||
* state even if we migrated tasks. Update it.
|
||||
*/
|
||||
idle = CPU_NOT_IDLE;
|
||||
idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
|
||||
}
|
||||
sd->last_balance = jiffies;
|
||||
}
|
||||
|
@@ -45,13 +45,6 @@ SCHED_FEAT(HRTICK, false)
|
||||
SCHED_FEAT(DOUBLE_TICK, false)
|
||||
SCHED_FEAT(LB_BIAS, true)
|
||||
|
||||
/*
|
||||
* Spin-wait on mutex acquisition when the mutex owner is running on
|
||||
* another cpu -- assumes that when the owner is running, it will soon
|
||||
* release the lock. Decreases scheduling overhead.
|
||||
*/
|
||||
SCHED_FEAT(OWNER_SPIN, true)
|
||||
|
||||
/*
|
||||
* Decrement CPU power based on time not spent running tasks
|
||||
*/
|
||||
|
@@ -13,6 +13,16 @@ select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
|
||||
{
|
||||
return task_cpu(p); /* IDLE tasks as never migrated */
|
||||
}
|
||||
|
||||
static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
|
||||
{
|
||||
idle_exit_fair(rq);
|
||||
}
|
||||
|
||||
static void post_schedule_idle(struct rq *rq)
|
||||
{
|
||||
idle_enter_fair(rq);
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
@@ -25,6 +35,10 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
|
||||
static struct task_struct *pick_next_task_idle(struct rq *rq)
|
||||
{
|
||||
schedstat_inc(rq, sched_goidle);
|
||||
#ifdef CONFIG_SMP
|
||||
/* Trigger the post schedule to do an idle_enter for CFS */
|
||||
rq->post_schedule = 1;
|
||||
#endif
|
||||
return rq->idle;
|
||||
}
|
||||
|
||||
@@ -86,6 +100,8 @@ const struct sched_class idle_sched_class = {
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
.select_task_rq = select_task_rq_idle,
|
||||
.pre_schedule = pre_schedule_idle,
|
||||
.post_schedule = post_schedule_idle,
|
||||
#endif
|
||||
|
||||
.set_curr_task = set_curr_task_idle,
|
||||
|
@@ -8,6 +8,7 @@
|
||||
#include <linux/tick.h>
|
||||
|
||||
#include "cpupri.h"
|
||||
#include "cpuacct.h"
|
||||
|
||||
extern __read_mostly int scheduler_running;
|
||||
|
||||
@@ -951,14 +952,6 @@ static const u32 prio_to_wmult[40] = {
|
||||
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
|
||||
};
|
||||
|
||||
/* Time spent by the tasks of the cpu accounting group executing in ... */
|
||||
enum cpuacct_stat_index {
|
||||
CPUACCT_STAT_USER, /* ... user mode */
|
||||
CPUACCT_STAT_SYSTEM, /* ... kernel mode */
|
||||
|
||||
CPUACCT_STAT_NSTATS,
|
||||
};
|
||||
|
||||
#define ENQUEUE_WAKEUP 1
|
||||
#define ENQUEUE_HEAD 2
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1032,6 +1025,18 @@ extern void update_group_power(struct sched_domain *sd, int cpu);
|
||||
extern void trigger_load_balance(struct rq *rq, int cpu);
|
||||
extern void idle_balance(int this_cpu, struct rq *this_rq);
|
||||
|
||||
/*
|
||||
* Only depends on SMP, FAIR_GROUP_SCHED may be removed when runnable_avg
|
||||
* becomes useful in lb
|
||||
*/
|
||||
#if defined(CONFIG_FAIR_GROUP_SCHED)
|
||||
extern void idle_enter_fair(struct rq *this_rq);
|
||||
extern void idle_exit_fair(struct rq *this_rq);
|
||||
#else
|
||||
static inline void idle_enter_fair(struct rq *this_rq) {}
|
||||
static inline void idle_exit_fair(struct rq *this_rq) {}
|
||||
#endif
|
||||
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
static inline void idle_balance(int cpu, struct rq *rq)
|
||||
@@ -1055,45 +1060,6 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
|
||||
|
||||
extern void update_idle_cpu_load(struct rq *this_rq);
|
||||
|
||||
#ifdef CONFIG_CGROUP_CPUACCT
|
||||
#include <linux/cgroup.h>
|
||||
/* track cpu usage of a group of tasks and its child groups */
|
||||
struct cpuacct {
|
||||
struct cgroup_subsys_state css;
|
||||
/* cpuusage holds pointer to a u64-type object on every cpu */
|
||||
u64 __percpu *cpuusage;
|
||||
struct kernel_cpustat __percpu *cpustat;
|
||||
};
|
||||
|
||||
extern struct cgroup_subsys cpuacct_subsys;
|
||||
extern struct cpuacct root_cpuacct;
|
||||
|
||||
/* return cpu accounting group corresponding to this container */
|
||||
static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp)
|
||||
{
|
||||
return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id),
|
||||
struct cpuacct, css);
|
||||
}
|
||||
|
||||
/* return cpu accounting group to which this task belongs */
|
||||
static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
||||
{
|
||||
return container_of(task_subsys_state(tsk, cpuacct_subsys_id),
|
||||
struct cpuacct, css);
|
||||
}
|
||||
|
||||
static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
||||
{
|
||||
if (!ca || !ca->css.cgroup->parent)
|
||||
return NULL;
|
||||
return cgroup_ca(ca->css.cgroup->parent);
|
||||
}
|
||||
|
||||
extern void cpuacct_charge(struct task_struct *tsk, u64 cputime);
|
||||
#else
|
||||
static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
static inline u64 steal_ticks(u64 steal)
|
||||
{
|
||||
@@ -1348,7 +1314,6 @@ extern void account_cfs_bandwidth_used(int enabled, int was_enabled);
|
||||
enum rq_nohz_flag_bits {
|
||||
NOHZ_TICK_STOPPED,
|
||||
NOHZ_BALANCE_KICK,
|
||||
NOHZ_IDLE,
|
||||
};
|
||||
|
||||
#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
|
||||
|
@@ -485,6 +485,9 @@ flush_signal_handlers(struct task_struct *t, int force_default)
|
||||
if (force_default || ka->sa.sa_handler != SIG_IGN)
|
||||
ka->sa.sa_handler = SIG_DFL;
|
||||
ka->sa.sa_flags = 0;
|
||||
#ifdef __ARCH_HAS_SA_RESTORER
|
||||
ka->sa.sa_restorer = NULL;
|
||||
#endif
|
||||
sigemptyset(&ka->sa.sa_mask);
|
||||
ka++;
|
||||
}
|
||||
@@ -2682,7 +2685,7 @@ static int do_sigpending(void *set, unsigned long sigsetsize)
|
||||
/**
|
||||
* sys_rt_sigpending - examine a pending signal that has been raised
|
||||
* while blocked
|
||||
* @set: stores pending signals
|
||||
* @uset: stores pending signals
|
||||
* @sigsetsize: size of sigset_t type or larger
|
||||
*/
|
||||
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
|
||||
@@ -2945,7 +2948,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
|
||||
|
||||
static int do_tkill(pid_t tgid, pid_t pid, int sig)
|
||||
{
|
||||
struct siginfo info;
|
||||
struct siginfo info = {};
|
||||
|
||||
info.si_signo = sig;
|
||||
info.si_errno = 0;
|
||||
|
@@ -131,7 +131,7 @@ static int smpboot_thread_fn(void *data)
|
||||
continue;
|
||||
}
|
||||
|
||||
//BUG_ON(td->cpu != smp_processor_id());
|
||||
BUG_ON(td->cpu != smp_processor_id());
|
||||
|
||||
/* Check for state change setup */
|
||||
switch (td->status) {
|
||||
@@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
|
||||
}
|
||||
get_task_struct(tsk);
|
||||
*per_cpu_ptr(ht->store, cpu) = tsk;
|
||||
if (ht->create)
|
||||
ht->create(cpu);
|
||||
if (ht->create) {
|
||||
/*
|
||||
* Make sure that the task has actually scheduled out
|
||||
* into park position, before calling the create
|
||||
* callback. At least the migration thread callback
|
||||
* requires that the task is off the runqueue.
|
||||
*/
|
||||
if (!wait_task_inactive(tsk, TASK_PARKED))
|
||||
WARN_ON(1);
|
||||
else
|
||||
ht->create(cpu);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -209,6 +219,8 @@ static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cp
|
||||
{
|
||||
struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu);
|
||||
|
||||
if (ht->pre_unpark)
|
||||
ht->pre_unpark(cpu);
|
||||
kthread_unpark(tsk);
|
||||
}
|
||||
|
||||
|
@@ -323,18 +323,10 @@ void irq_enter(void)
|
||||
|
||||
static inline void invoke_softirq(void)
|
||||
{
|
||||
if (!force_irqthreads) {
|
||||
#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
|
||||
if (!force_irqthreads)
|
||||
__do_softirq();
|
||||
#else
|
||||
do_softirq();
|
||||
#endif
|
||||
} else {
|
||||
__local_bh_disable((unsigned long)__builtin_return_address(0),
|
||||
SOFTIRQ_OFFSET);
|
||||
else
|
||||
wakeup_softirqd();
|
||||
__local_bh_enable(SOFTIRQ_OFFSET);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void tick_irq_exit(void)
|
||||
@@ -355,15 +347,20 @@ static inline void tick_irq_exit(void)
|
||||
*/
|
||||
void irq_exit(void)
|
||||
{
|
||||
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
|
||||
local_irq_disable();
|
||||
#else
|
||||
WARN_ON_ONCE(!irqs_disabled());
|
||||
#endif
|
||||
|
||||
account_irq_exit_time(current);
|
||||
trace_hardirq_exit();
|
||||
sub_preempt_count(IRQ_EXIT_OFFSET);
|
||||
sub_preempt_count(HARDIRQ_OFFSET);
|
||||
if (!in_interrupt() && local_softirq_pending())
|
||||
invoke_softirq();
|
||||
|
||||
tick_irq_exit();
|
||||
rcu_irq_exit();
|
||||
sched_preempt_enable_no_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -336,7 +336,7 @@ static struct smp_hotplug_thread cpu_stop_threads = {
|
||||
.create = cpu_stop_create,
|
||||
.setup = cpu_stop_unpark,
|
||||
.park = cpu_stop_park,
|
||||
.unpark = cpu_stop_unpark,
|
||||
.pre_unpark = cpu_stop_unpark,
|
||||
.selfparking = true,
|
||||
};
|
||||
|
||||
|
62
kernel/sys.c
62
kernel/sys.c
@@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd)
|
||||
system_state = SYSTEM_RESTART;
|
||||
usermodehelper_disable();
|
||||
device_shutdown();
|
||||
syscore_shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -370,6 +369,7 @@ void kernel_restart(char *cmd)
|
||||
{
|
||||
kernel_restart_prepare(cmd);
|
||||
disable_nonboot_cpus();
|
||||
syscore_shutdown();
|
||||
if (!cmd)
|
||||
printk(KERN_EMERG "Restarting system.\n");
|
||||
else
|
||||
@@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state)
|
||||
void kernel_halt(void)
|
||||
{
|
||||
kernel_shutdown_prepare(SYSTEM_HALT);
|
||||
disable_nonboot_cpus();
|
||||
syscore_shutdown();
|
||||
printk(KERN_EMERG "System halted.\n");
|
||||
kmsg_dump(KMSG_DUMP_HALT);
|
||||
@@ -2185,9 +2186,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
|
||||
|
||||
char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
|
||||
|
||||
static int __orderly_poweroff(void)
|
||||
static int __orderly_poweroff(bool force)
|
||||
{
|
||||
int argc;
|
||||
char **argv;
|
||||
static char *envp[] = {
|
||||
"HOME=/",
|
||||
@@ -2196,35 +2196,19 @@ static int __orderly_poweroff(void)
|
||||
};
|
||||
int ret;
|
||||
|
||||
argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
|
||||
if (argv == NULL) {
|
||||
argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
|
||||
if (argv) {
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
|
||||
argv_free(argv);
|
||||
} else {
|
||||
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
|
||||
__func__, poweroff_cmd);
|
||||
return -ENOMEM;
|
||||
__func__, poweroff_cmd);
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC,
|
||||
NULL, NULL, NULL);
|
||||
argv_free(argv);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* orderly_poweroff - Trigger an orderly system poweroff
|
||||
* @force: force poweroff if command execution fails
|
||||
*
|
||||
* This may be called from any context to trigger a system shutdown.
|
||||
* If the orderly shutdown fails, it will force an immediate shutdown.
|
||||
*/
|
||||
int orderly_poweroff(bool force)
|
||||
{
|
||||
int ret = __orderly_poweroff();
|
||||
|
||||
if (ret && force) {
|
||||
printk(KERN_WARNING "Failed to start orderly shutdown: "
|
||||
"forcing the issue\n");
|
||||
|
||||
"forcing the issue\n");
|
||||
/*
|
||||
* I guess this should try to kick off some daemon to sync and
|
||||
* poweroff asap. Or not even bother syncing if we're doing an
|
||||
@@ -2236,4 +2220,28 @@ int orderly_poweroff(bool force)
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool poweroff_force;
|
||||
|
||||
static void poweroff_work_func(struct work_struct *work)
|
||||
{
|
||||
__orderly_poweroff(poweroff_force);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(poweroff_work, poweroff_work_func);
|
||||
|
||||
/**
|
||||
* orderly_poweroff - Trigger an orderly system poweroff
|
||||
* @force: force poweroff if command execution fails
|
||||
*
|
||||
* This may be called from any context to trigger a system shutdown.
|
||||
* If the orderly shutdown fails, it will force an immediate shutdown.
|
||||
*/
|
||||
int orderly_poweroff(bool force)
|
||||
{
|
||||
if (force) /* do not override the pending "true" */
|
||||
poweroff_force = true;
|
||||
schedule_work(&poweroff_work);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(orderly_poweroff);
|
||||
|
@@ -106,7 +106,6 @@ extern unsigned int core_pipe_limit;
|
||||
#endif
|
||||
extern int pid_max;
|
||||
extern int pid_max_min, pid_max_max;
|
||||
extern int sysctl_drop_caches;
|
||||
extern int percpu_pagelist_fraction;
|
||||
extern int compat_log;
|
||||
extern int latencytop_enabled;
|
||||
@@ -1430,6 +1429,20 @@ static struct ctl_table vm_table[] = {
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.procname = "user_reserve_kbytes",
|
||||
.data = &sysctl_user_reserve_kbytes,
|
||||
.maxlen = sizeof(sysctl_user_reserve_kbytes),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
},
|
||||
{
|
||||
.procname = "admin_reserve_kbytes",
|
||||
.data = &sysctl_admin_reserve_kbytes,
|
||||
.maxlen = sizeof(sysctl_admin_reserve_kbytes),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_doulongvec_minmax,
|
||||
},
|
||||
{ }
|
||||
};
|
||||
|
||||
|
@@ -365,7 +365,7 @@ int init_test_probes(void)
|
||||
target2 = kprobe_target2;
|
||||
|
||||
do {
|
||||
rand1 = random32();
|
||||
rand1 = prandom_u32();
|
||||
} while (rand1 <= div_factor);
|
||||
|
||||
printk(KERN_INFO "Kprobe smoke test started\n");
|
||||
|
@@ -67,7 +67,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
|
||||
*/
|
||||
int tick_check_broadcast_device(struct clock_event_device *dev)
|
||||
{
|
||||
if ((tick_broadcast_device.evtdev &&
|
||||
if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
|
||||
(tick_broadcast_device.evtdev &&
|
||||
tick_broadcast_device.evtdev->rating >= dev->rating) ||
|
||||
(dev->features & CLOCK_EVT_FEAT_C3STOP))
|
||||
return 0;
|
||||
|
@@ -176,6 +176,8 @@ config IRQSOFF_TRACER
|
||||
select GENERIC_TRACER
|
||||
select TRACER_MAX_TRACE
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
select TRACER_SNAPSHOT
|
||||
select TRACER_SNAPSHOT_PER_CPU_SWAP
|
||||
help
|
||||
This option measures the time spent in irqs-off critical
|
||||
sections, with microsecond accuracy.
|
||||
@@ -198,6 +200,8 @@ config PREEMPT_TRACER
|
||||
select GENERIC_TRACER
|
||||
select TRACER_MAX_TRACE
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
select TRACER_SNAPSHOT
|
||||
select TRACER_SNAPSHOT_PER_CPU_SWAP
|
||||
help
|
||||
This option measures the time spent in preemption-off critical
|
||||
sections, with microsecond accuracy.
|
||||
@@ -217,6 +221,7 @@ config SCHED_TRACER
|
||||
select GENERIC_TRACER
|
||||
select CONTEXT_SWITCH_TRACER
|
||||
select TRACER_MAX_TRACE
|
||||
select TRACER_SNAPSHOT
|
||||
help
|
||||
This tracer tracks the latency of the highest priority task
|
||||
to be scheduled in, starting from the point it has woken up.
|
||||
@@ -248,6 +253,27 @@ config TRACER_SNAPSHOT
|
||||
echo 1 > /sys/kernel/debug/tracing/snapshot
|
||||
cat snapshot
|
||||
|
||||
config TRACER_SNAPSHOT_PER_CPU_SWAP
|
||||
bool "Allow snapshot to swap per CPU"
|
||||
depends on TRACER_SNAPSHOT
|
||||
select RING_BUFFER_ALLOW_SWAP
|
||||
help
|
||||
Allow doing a snapshot of a single CPU buffer instead of a
|
||||
full swap (all buffers). If this is set, then the following is
|
||||
allowed:
|
||||
|
||||
echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
|
||||
|
||||
After which, only the tracing buffer for CPU 2 was swapped with
|
||||
the main tracing buffer, and the other CPU buffers remain the same.
|
||||
|
||||
When this is enabled, this adds a little more overhead to the
|
||||
trace recording, as it needs to add some checks to synchronize
|
||||
recording with swaps. But this does not affect the performance
|
||||
of the overall system. This is enabled by default when the preempt
|
||||
or irq latency tracers are enabled, as those need to swap as well
|
||||
and already adds the overhead (plus a lot more).
|
||||
|
||||
config TRACE_BRANCH_PROFILING
|
||||
bool
|
||||
select GENERIC_TRACER
|
||||
@@ -414,24 +440,28 @@ config PROBE_EVENTS
|
||||
def_bool n
|
||||
|
||||
config DYNAMIC_FTRACE
|
||||
bool "enable/disable ftrace tracepoints dynamically"
|
||||
bool "enable/disable function tracing dynamically"
|
||||
depends on FUNCTION_TRACER
|
||||
depends on HAVE_DYNAMIC_FTRACE
|
||||
default y
|
||||
help
|
||||
This option will modify all the calls to ftrace dynamically
|
||||
(will patch them out of the binary image and replace them
|
||||
with a No-Op instruction) as they are called. A table is
|
||||
created to dynamically enable them again.
|
||||
This option will modify all the calls to function tracing
|
||||
dynamically (will patch them out of the binary image and
|
||||
replace them with a No-Op instruction) on boot up. During
|
||||
compile time, a table is made of all the locations that ftrace
|
||||
can function trace, and this table is linked into the kernel
|
||||
image. When this is enabled, functions can be individually
|
||||
enabled, and the functions not enabled will not affect
|
||||
performance of the system.
|
||||
|
||||
See the files in /sys/kernel/debug/tracing:
|
||||
available_filter_functions
|
||||
set_ftrace_filter
|
||||
set_ftrace_notrace
|
||||
|
||||
This way a CONFIG_FUNCTION_TRACER kernel is slightly larger, but
|
||||
otherwise has native performance as long as no tracing is active.
|
||||
|
||||
The changes to the code are done by a kernel thread that
|
||||
wakes up once a second and checks to see if any ftrace calls
|
||||
were made. If so, it runs stop_machine (stops all CPUS)
|
||||
and modifies the code to jump over the call to ftrace.
|
||||
|
||||
config DYNAMIC_FTRACE_WITH_REGS
|
||||
def_bool y
|
||||
depends on DYNAMIC_FTRACE
|
||||
@@ -520,6 +550,29 @@ config RING_BUFFER_BENCHMARK
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config RING_BUFFER_STARTUP_TEST
|
||||
bool "Ring buffer startup self test"
|
||||
depends on RING_BUFFER
|
||||
help
|
||||
Run a simple self test on the ring buffer on boot up. Late in the
|
||||
kernel boot sequence, the test will start that kicks off
|
||||
a thread per cpu. Each thread will write various size events
|
||||
into the ring buffer. Another thread is created to send IPIs
|
||||
to each of the threads, where the IPI handler will also write
|
||||
to the ring buffer, to test/stress the nesting ability.
|
||||
If any anomalies are discovered, a warning will be displayed
|
||||
and all ring buffers will be disabled.
|
||||
|
||||
The test runs for 10 seconds. This will slow your boot time
|
||||
by at least 10 more seconds.
|
||||
|
||||
At the end of the test, statics and more checks are done.
|
||||
It will output the stats of each per cpu buffer. What
|
||||
was written, the sizes, what was read, what was lost, and
|
||||
other similar details.
|
||||
|
||||
If unsure, say N
|
||||
|
||||
endif # FTRACE
|
||||
|
||||
endif # TRACING_SUPPORT
|
||||
|
@@ -72,7 +72,7 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
|
||||
bool blk_tracer = blk_tracer_enabled;
|
||||
|
||||
if (blk_tracer) {
|
||||
buffer = blk_tr->buffer;
|
||||
buffer = blk_tr->trace_buffer.buffer;
|
||||
pc = preempt_count();
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
|
||||
sizeof(*t) + len,
|
||||
@@ -218,7 +218,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
|
||||
if (blk_tracer) {
|
||||
tracing_record_cmdline(current);
|
||||
|
||||
buffer = blk_tr->buffer;
|
||||
buffer = blk_tr->trace_buffer.buffer;
|
||||
pc = preempt_count();
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
|
||||
sizeof(*t) + pdu_len,
|
||||
@@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore,
|
||||
struct request_queue *q,
|
||||
struct request *rq)
|
||||
{
|
||||
struct blk_trace *bt = q->blk_trace;
|
||||
|
||||
/* if control ever passes through here, it's a request based driver */
|
||||
if (unlikely(bt && !bt->rq_based))
|
||||
bt->rq_based = true;
|
||||
|
||||
blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
|
||||
}
|
||||
|
||||
@@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
|
||||
blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
|
||||
}
|
||||
|
||||
static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error)
|
||||
static void blk_add_trace_bio_complete(void *ignore,
|
||||
struct request_queue *q, struct bio *bio,
|
||||
int error)
|
||||
{
|
||||
struct request_queue *q;
|
||||
struct blk_trace *bt;
|
||||
|
||||
if (!bio->bi_bdev)
|
||||
return;
|
||||
|
||||
q = bdev_get_queue(bio->bi_bdev);
|
||||
bt = q->blk_trace;
|
||||
|
||||
/*
|
||||
* Request based drivers will generate both rq and bio completions.
|
||||
* Ignore bio ones.
|
||||
*/
|
||||
if (likely(!bt) || bt->rq_based)
|
||||
return;
|
||||
|
||||
blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
|
||||
}
|
||||
|
||||
|
@@ -66,7 +66,7 @@
|
||||
|
||||
static struct ftrace_ops ftrace_list_end __read_mostly = {
|
||||
.func = ftrace_stub,
|
||||
.flags = FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
.flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
|
||||
};
|
||||
|
||||
/* ftrace_enabled is a method to turn ftrace on or off */
|
||||
@@ -486,7 +486,6 @@ struct ftrace_profile_stat {
|
||||
#define PROFILES_PER_PAGE \
|
||||
(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
|
||||
|
||||
static int ftrace_profile_bits __read_mostly;
|
||||
static int ftrace_profile_enabled __read_mostly;
|
||||
|
||||
/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
|
||||
@@ -494,7 +493,8 @@ static DEFINE_MUTEX(ftrace_profile_lock);
|
||||
|
||||
static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
|
||||
|
||||
#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
|
||||
#define FTRACE_PROFILE_HASH_BITS 10
|
||||
#define FTRACE_PROFILE_HASH_SIZE (1 << FTRACE_PROFILE_HASH_BITS)
|
||||
|
||||
static void *
|
||||
function_stat_next(void *v, int idx)
|
||||
@@ -676,7 +676,7 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
|
||||
|
||||
pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
|
||||
|
||||
for (i = 0; i < pages; i++) {
|
||||
for (i = 1; i < pages; i++) {
|
||||
pg->next = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
if (!pg->next)
|
||||
goto out_free;
|
||||
@@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
|
||||
free_page(tmp);
|
||||
}
|
||||
|
||||
free_page((unsigned long)stat->pages);
|
||||
stat->pages = NULL;
|
||||
stat->start = NULL;
|
||||
|
||||
@@ -725,13 +724,6 @@ static int ftrace_profile_init_cpu(int cpu)
|
||||
if (!stat->hash)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!ftrace_profile_bits) {
|
||||
size--;
|
||||
|
||||
for (; size; size >>= 1)
|
||||
ftrace_profile_bits++;
|
||||
}
|
||||
|
||||
/* Preallocate the function profiling pages */
|
||||
if (ftrace_profile_pages_init(stat) < 0) {
|
||||
kfree(stat->hash);
|
||||
@@ -764,7 +756,7 @@ ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
|
||||
struct hlist_head *hhd;
|
||||
unsigned long key;
|
||||
|
||||
key = hash_long(ip, ftrace_profile_bits);
|
||||
key = hash_long(ip, FTRACE_PROFILE_HASH_BITS);
|
||||
hhd = &stat->hash[key];
|
||||
|
||||
if (hlist_empty(hhd))
|
||||
@@ -783,7 +775,7 @@ static void ftrace_add_profile(struct ftrace_profile_stat *stat,
|
||||
{
|
||||
unsigned long key;
|
||||
|
||||
key = hash_long(rec->ip, ftrace_profile_bits);
|
||||
key = hash_long(rec->ip, FTRACE_PROFILE_HASH_BITS);
|
||||
hlist_add_head_rcu(&rec->node, &stat->hash[key]);
|
||||
}
|
||||
|
||||
@@ -1053,6 +1045,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
|
||||
|
||||
static struct pid * const ftrace_swapper_pid = &init_struct_pid;
|
||||
|
||||
loff_t
|
||||
ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
loff_t ret;
|
||||
|
||||
if (file->f_mode & FMODE_READ)
|
||||
ret = seq_lseek(file, offset, whence);
|
||||
else
|
||||
file->f_pos = ret = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
|
||||
#ifndef CONFIG_FTRACE_MCOUNT_RECORD
|
||||
@@ -1067,7 +1072,7 @@ struct ftrace_func_probe {
|
||||
unsigned long flags;
|
||||
unsigned long ip;
|
||||
void *data;
|
||||
struct rcu_head rcu;
|
||||
struct list_head free_list;
|
||||
};
|
||||
|
||||
struct ftrace_func_entry {
|
||||
@@ -1317,7 +1322,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
|
||||
struct hlist_head *hhd;
|
||||
struct ftrace_hash *old_hash;
|
||||
struct ftrace_hash *new_hash;
|
||||
unsigned long key;
|
||||
int size = src->count;
|
||||
int bits = 0;
|
||||
int ret;
|
||||
@@ -1360,10 +1364,6 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
|
||||
for (i = 0; i < size; i++) {
|
||||
hhd = &src->buckets[i];
|
||||
hlist_for_each_entry_safe(entry, tn, hhd, hlist) {
|
||||
if (bits > 0)
|
||||
key = hash_long(entry->ip, bits);
|
||||
else
|
||||
key = 0;
|
||||
remove_hash_entry(src, entry);
|
||||
__add_hash_entry(new_hash, entry);
|
||||
}
|
||||
@@ -2613,7 +2613,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
|
||||
* routine, you can use ftrace_filter_write() for the write
|
||||
* routine if @flag has FTRACE_ITER_FILTER set, or
|
||||
* ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
|
||||
* ftrace_regex_lseek() should be used as the lseek routine, and
|
||||
* ftrace_filter_lseek() should be used as the lseek routine, and
|
||||
* release must call ftrace_regex_release().
|
||||
*/
|
||||
int
|
||||
@@ -2697,19 +2697,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
|
||||
inode, file);
|
||||
}
|
||||
|
||||
loff_t
|
||||
ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
|
||||
{
|
||||
loff_t ret;
|
||||
|
||||
if (file->f_mode & FMODE_READ)
|
||||
ret = seq_lseek(file, offset, whence);
|
||||
else
|
||||
file->f_pos = ret = 1;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ftrace_match(char *str, char *regex, int len, int type)
|
||||
{
|
||||
int matched = 0;
|
||||
@@ -2974,28 +2961,27 @@ static void __disable_ftrace_function_probe(void)
|
||||
}
|
||||
|
||||
|
||||
static void ftrace_free_entry_rcu(struct rcu_head *rhp)
|
||||
static void ftrace_free_entry(struct ftrace_func_probe *entry)
|
||||
{
|
||||
struct ftrace_func_probe *entry =
|
||||
container_of(rhp, struct ftrace_func_probe, rcu);
|
||||
|
||||
if (entry->ops->free)
|
||||
entry->ops->free(&entry->data);
|
||||
entry->ops->free(entry->ops, entry->ip, &entry->data);
|
||||
kfree(entry);
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
void *data)
|
||||
{
|
||||
struct ftrace_func_probe *entry;
|
||||
struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
|
||||
struct ftrace_hash *hash;
|
||||
struct ftrace_page *pg;
|
||||
struct dyn_ftrace *rec;
|
||||
int type, len, not;
|
||||
unsigned long key;
|
||||
int count = 0;
|
||||
char *search;
|
||||
int ret;
|
||||
|
||||
type = filter_parse_regex(glob, strlen(glob), &search, ¬);
|
||||
len = strlen(search);
|
||||
@@ -3006,8 +2992,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
|
||||
mutex_lock(&ftrace_lock);
|
||||
|
||||
if (unlikely(ftrace_disabled))
|
||||
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
|
||||
if (!hash) {
|
||||
count = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (unlikely(ftrace_disabled)) {
|
||||
count = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
do_for_each_ftrace_rec(pg, rec) {
|
||||
|
||||
@@ -3031,14 +3025,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
* for each function we find. We call the callback
|
||||
* to give the caller an opportunity to do so.
|
||||
*/
|
||||
if (ops->callback) {
|
||||
if (ops->callback(rec->ip, &entry->data) < 0) {
|
||||
if (ops->init) {
|
||||
if (ops->init(ops, rec->ip, &entry->data) < 0) {
|
||||
/* caller does not like this func */
|
||||
kfree(entry);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ret = enter_record(hash, rec, 0);
|
||||
if (ret < 0) {
|
||||
kfree(entry);
|
||||
count = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
entry->ops = ops;
|
||||
entry->ip = rec->ip;
|
||||
|
||||
@@ -3046,10 +3047,16 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
|
||||
|
||||
} while_for_each_ftrace_rec();
|
||||
|
||||
ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
|
||||
if (ret < 0)
|
||||
count = ret;
|
||||
|
||||
__enable_ftrace_function_probe();
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ftrace_lock);
|
||||
free_ftrace_hash(hash);
|
||||
|
||||
return count;
|
||||
}
|
||||
@@ -3063,7 +3070,12 @@ static void
|
||||
__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
void *data, int flags)
|
||||
{
|
||||
struct ftrace_func_entry *rec_entry;
|
||||
struct ftrace_func_probe *entry;
|
||||
struct ftrace_func_probe *p;
|
||||
struct ftrace_hash **orig_hash = &trace_probe_ops.filter_hash;
|
||||
struct list_head free_list;
|
||||
struct ftrace_hash *hash;
|
||||
struct hlist_node *tmp;
|
||||
char str[KSYM_SYMBOL_LEN];
|
||||
int type = MATCH_FULL;
|
||||
@@ -3084,6 +3096,14 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
}
|
||||
|
||||
mutex_lock(&ftrace_lock);
|
||||
|
||||
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash);
|
||||
if (!hash)
|
||||
/* Hmm, should report this somehow */
|
||||
goto out_unlock;
|
||||
|
||||
INIT_LIST_HEAD(&free_list);
|
||||
|
||||
for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
|
||||
struct hlist_head *hhd = &ftrace_func_hash[i];
|
||||
|
||||
@@ -3104,12 +3124,30 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
|
||||
continue;
|
||||
}
|
||||
|
||||
hlist_del(&entry->node);
|
||||
call_rcu(&entry->rcu, ftrace_free_entry_rcu);
|
||||
rec_entry = ftrace_lookup_ip(hash, entry->ip);
|
||||
/* It is possible more than one entry had this ip */
|
||||
if (rec_entry)
|
||||
free_hash_entry(hash, rec_entry);
|
||||
|
||||
hlist_del_rcu(&entry->node);
|
||||
list_add(&entry->free_list, &free_list);
|
||||
}
|
||||
}
|
||||
__disable_ftrace_function_probe();
|
||||
/*
|
||||
* Remove after the disable is called. Otherwise, if the last
|
||||
* probe is removed, a null hash means *all enabled*.
|
||||
*/
|
||||
ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash);
|
||||
synchronize_sched();
|
||||
list_for_each_entry_safe(entry, p, &free_list, free_list) {
|
||||
list_del(&entry->free_list);
|
||||
ftrace_free_entry(entry);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&ftrace_lock);
|
||||
free_ftrace_hash(hash);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -3441,14 +3479,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
|
||||
|
||||
static int __init set_ftrace_notrace(char *str)
|
||||
{
|
||||
strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
|
||||
strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
|
||||
return 1;
|
||||
}
|
||||
__setup("ftrace_notrace=", set_ftrace_notrace);
|
||||
|
||||
static int __init set_ftrace_filter(char *str)
|
||||
{
|
||||
strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
|
||||
strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
|
||||
return 1;
|
||||
}
|
||||
__setup("ftrace_filter=", set_ftrace_filter);
|
||||
@@ -3571,7 +3609,7 @@ static const struct file_operations ftrace_filter_fops = {
|
||||
.open = ftrace_filter_open,
|
||||
.read = seq_read,
|
||||
.write = ftrace_filter_write,
|
||||
.llseek = ftrace_regex_lseek,
|
||||
.llseek = ftrace_filter_lseek,
|
||||
.release = ftrace_regex_release,
|
||||
};
|
||||
|
||||
@@ -3579,7 +3617,7 @@ static const struct file_operations ftrace_notrace_fops = {
|
||||
.open = ftrace_notrace_open,
|
||||
.read = seq_read,
|
||||
.write = ftrace_notrace_write,
|
||||
.llseek = ftrace_regex_lseek,
|
||||
.llseek = ftrace_filter_lseek,
|
||||
.release = ftrace_regex_release,
|
||||
};
|
||||
|
||||
@@ -3737,7 +3775,8 @@ out:
|
||||
if (fail)
|
||||
return -EINVAL;
|
||||
|
||||
ftrace_graph_filter_enabled = 1;
|
||||
ftrace_graph_filter_enabled = !!(*idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3784,8 +3823,8 @@ static const struct file_operations ftrace_graph_fops = {
|
||||
.open = ftrace_graph_open,
|
||||
.read = seq_read,
|
||||
.write = ftrace_graph_write,
|
||||
.llseek = ftrace_filter_lseek,
|
||||
.release = ftrace_graph_release,
|
||||
.llseek = seq_lseek,
|
||||
};
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
||||
@@ -4131,7 +4170,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
|
||||
preempt_disable_notrace();
|
||||
trace_recursion_set(TRACE_CONTROL_BIT);
|
||||
do_for_each_ftrace_op(op, ftrace_control_list) {
|
||||
if (!ftrace_function_local_disabled(op) &&
|
||||
if (!(op->flags & FTRACE_OPS_FL_STUB) &&
|
||||
!ftrace_function_local_disabled(op) &&
|
||||
ftrace_ops_test(op, ip))
|
||||
op->func(ip, parent_ip, op, regs);
|
||||
} while_for_each_ftrace_op(op);
|
||||
@@ -4439,7 +4479,7 @@ static const struct file_operations ftrace_pid_fops = {
|
||||
.open = ftrace_pid_open,
|
||||
.write = ftrace_pid_write,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.llseek = ftrace_filter_lseek,
|
||||
.release = ftrace_pid_release,
|
||||
};
|
||||
|
||||
@@ -4555,12 +4595,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
|
||||
ftrace_startup_sysctl();
|
||||
|
||||
/* we are starting ftrace again */
|
||||
if (ftrace_ops_list != &ftrace_list_end) {
|
||||
if (ftrace_ops_list->next == &ftrace_list_end)
|
||||
ftrace_trace_function = ftrace_ops_list->func;
|
||||
else
|
||||
ftrace_trace_function = ftrace_ops_list_func;
|
||||
}
|
||||
if (ftrace_ops_list != &ftrace_list_end)
|
||||
update_ftrace_function();
|
||||
|
||||
} else {
|
||||
/* stopping ftrace calls (just send to ftrace_stub) */
|
||||
|
@@ -8,13 +8,16 @@
|
||||
#include <linux/trace_clock.h>
|
||||
#include <linux/trace_seq.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/irq_work.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/kthread.h> /* for self test */
|
||||
#include <linux/kmemcheck.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/hash.h>
|
||||
@@ -444,6 +447,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct rb_irq_work {
|
||||
struct irq_work work;
|
||||
wait_queue_head_t waiters;
|
||||
bool waiters_pending;
|
||||
};
|
||||
|
||||
/*
|
||||
* head_page == tail_page && head == tail then buffer is empty.
|
||||
*/
|
||||
@@ -478,6 +487,8 @@ struct ring_buffer_per_cpu {
|
||||
struct list_head new_pages; /* new pages to add */
|
||||
struct work_struct update_pages_work;
|
||||
struct completion update_done;
|
||||
|
||||
struct rb_irq_work irq_work;
|
||||
};
|
||||
|
||||
struct ring_buffer {
|
||||
@@ -497,6 +508,8 @@ struct ring_buffer {
|
||||
struct notifier_block cpu_notify;
|
||||
#endif
|
||||
u64 (*clock)(void);
|
||||
|
||||
struct rb_irq_work irq_work;
|
||||
};
|
||||
|
||||
struct ring_buffer_iter {
|
||||
@@ -508,6 +521,118 @@ struct ring_buffer_iter {
|
||||
u64 read_stamp;
|
||||
};
|
||||
|
||||
/*
|
||||
* rb_wake_up_waiters - wake up tasks waiting for ring buffer input
|
||||
*
|
||||
* Schedules a delayed work to wake up any task that is blocked on the
|
||||
* ring buffer waiters queue.
|
||||
*/
|
||||
static void rb_wake_up_waiters(struct irq_work *work)
|
||||
{
|
||||
struct rb_irq_work *rbwork = container_of(work, struct rb_irq_work, work);
|
||||
|
||||
wake_up_all(&rbwork->waiters);
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_wait - wait for input to the ring buffer
|
||||
* @buffer: buffer to wait on
|
||||
* @cpu: the cpu buffer to wait on
|
||||
*
|
||||
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
|
||||
* as data is added to any of the @buffer's cpu buffers. Otherwise
|
||||
* it will wait for data to be added to a specific cpu buffer.
|
||||
*/
|
||||
void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
DEFINE_WAIT(wait);
|
||||
struct rb_irq_work *work;
|
||||
|
||||
/*
|
||||
* Depending on what the caller is waiting for, either any
|
||||
* data in any cpu buffer, or a specific buffer, put the
|
||||
* caller on the appropriate wait queue.
|
||||
*/
|
||||
if (cpu == RING_BUFFER_ALL_CPUS)
|
||||
work = &buffer->irq_work;
|
||||
else {
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
work = &cpu_buffer->irq_work;
|
||||
}
|
||||
|
||||
|
||||
prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
|
||||
|
||||
/*
|
||||
* The events can happen in critical sections where
|
||||
* checking a work queue can cause deadlocks.
|
||||
* After adding a task to the queue, this flag is set
|
||||
* only to notify events to try to wake up the queue
|
||||
* using irq_work.
|
||||
*
|
||||
* We don't clear it even if the buffer is no longer
|
||||
* empty. The flag only causes the next event to run
|
||||
* irq_work to do the work queue wake up. The worse
|
||||
* that can happen if we race with !trace_empty() is that
|
||||
* an event will cause an irq_work to try to wake up
|
||||
* an empty queue.
|
||||
*
|
||||
* There's no reason to protect this flag either, as
|
||||
* the work queue and irq_work logic will do the necessary
|
||||
* synchronization for the wake ups. The only thing
|
||||
* that is necessary is that the wake up happens after
|
||||
* a task has been queued. It's OK for spurious wake ups.
|
||||
*/
|
||||
work->waiters_pending = true;
|
||||
|
||||
if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
|
||||
(cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
|
||||
schedule();
|
||||
|
||||
finish_wait(&work->waiters, &wait);
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_poll_wait - poll on buffer input
|
||||
* @buffer: buffer to wait on
|
||||
* @cpu: the cpu buffer to wait on
|
||||
* @filp: the file descriptor
|
||||
* @poll_table: The poll descriptor
|
||||
*
|
||||
* If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
|
||||
* as data is added to any of the @buffer's cpu buffers. Otherwise
|
||||
* it will wait for data to be added to a specific cpu buffer.
|
||||
*
|
||||
* Returns POLLIN | POLLRDNORM if data exists in the buffers,
|
||||
* zero otherwise.
|
||||
*/
|
||||
int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
|
||||
struct file *filp, poll_table *poll_table)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
struct rb_irq_work *work;
|
||||
|
||||
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
|
||||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
|
||||
return POLLIN | POLLRDNORM;
|
||||
|
||||
if (cpu == RING_BUFFER_ALL_CPUS)
|
||||
work = &buffer->irq_work;
|
||||
else {
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
work = &cpu_buffer->irq_work;
|
||||
}
|
||||
|
||||
work->waiters_pending = true;
|
||||
poll_wait(filp, &work->waiters, poll_table);
|
||||
|
||||
if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) ||
|
||||
(cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu)))
|
||||
return POLLIN | POLLRDNORM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* buffer may be either ring_buffer or ring_buffer_per_cpu */
|
||||
#define RB_WARN_ON(b, cond) \
|
||||
({ \
|
||||
@@ -1063,6 +1188,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int nr_pages, int cpu)
|
||||
cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
|
||||
INIT_WORK(&cpu_buffer->update_pages_work, update_pages_handler);
|
||||
init_completion(&cpu_buffer->update_done);
|
||||
init_irq_work(&cpu_buffer->irq_work.work, rb_wake_up_waiters);
|
||||
init_waitqueue_head(&cpu_buffer->irq_work.waiters);
|
||||
|
||||
bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
|
||||
GFP_KERNEL, cpu_to_node(cpu));
|
||||
@@ -1158,6 +1285,9 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
|
||||
buffer->clock = trace_clock_local;
|
||||
buffer->reader_lock_key = key;
|
||||
|
||||
init_irq_work(&buffer->irq_work.work, rb_wake_up_waiters);
|
||||
init_waitqueue_head(&buffer->irq_work.waiters);
|
||||
|
||||
/* need at least two pages */
|
||||
if (nr_pages < 2)
|
||||
nr_pages = 2;
|
||||
@@ -1553,11 +1683,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
|
||||
if (!cpu_buffer->nr_pages_to_update)
|
||||
continue;
|
||||
|
||||
if (cpu_online(cpu))
|
||||
/* The update must run on the CPU that is being updated. */
|
||||
preempt_disable();
|
||||
if (cpu == smp_processor_id() || !cpu_online(cpu)) {
|
||||
rb_update_pages(cpu_buffer);
|
||||
cpu_buffer->nr_pages_to_update = 0;
|
||||
} else {
|
||||
/*
|
||||
* Can not disable preemption for schedule_work_on()
|
||||
* on PREEMPT_RT.
|
||||
*/
|
||||
preempt_enable();
|
||||
schedule_work_on(cpu,
|
||||
&cpu_buffer->update_pages_work);
|
||||
else
|
||||
rb_update_pages(cpu_buffer);
|
||||
preempt_disable();
|
||||
}
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* wait for all the updates to complete */
|
||||
@@ -1595,12 +1736,22 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
|
||||
|
||||
get_online_cpus();
|
||||
|
||||
if (cpu_online(cpu_id)) {
|
||||
preempt_disable();
|
||||
/* The update must run on the CPU that is being updated. */
|
||||
if (cpu_id == smp_processor_id() || !cpu_online(cpu_id))
|
||||
rb_update_pages(cpu_buffer);
|
||||
else {
|
||||
/*
|
||||
* Can not disable preemption for schedule_work_on()
|
||||
* on PREEMPT_RT.
|
||||
*/
|
||||
preempt_enable();
|
||||
schedule_work_on(cpu_id,
|
||||
&cpu_buffer->update_pages_work);
|
||||
wait_for_completion(&cpu_buffer->update_done);
|
||||
} else
|
||||
rb_update_pages(cpu_buffer);
|
||||
preempt_disable();
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
cpu_buffer->nr_pages_to_update = 0;
|
||||
put_online_cpus();
|
||||
@@ -2612,6 +2763,22 @@ static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
rb_end_commit(cpu_buffer);
|
||||
}
|
||||
|
||||
static __always_inline void
|
||||
rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
|
||||
{
|
||||
if (buffer->irq_work.waiters_pending) {
|
||||
buffer->irq_work.waiters_pending = false;
|
||||
/* irq_work_queue() supplies it's own memory barriers */
|
||||
irq_work_queue(&buffer->irq_work.work);
|
||||
}
|
||||
|
||||
if (cpu_buffer->irq_work.waiters_pending) {
|
||||
cpu_buffer->irq_work.waiters_pending = false;
|
||||
/* irq_work_queue() supplies it's own memory barriers */
|
||||
irq_work_queue(&cpu_buffer->irq_work.work);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ring_buffer_unlock_commit - commit a reserved
|
||||
* @buffer: The buffer to commit to
|
||||
@@ -2631,6 +2798,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
|
||||
|
||||
rb_commit(cpu_buffer, event);
|
||||
|
||||
rb_wakeups(buffer, cpu_buffer);
|
||||
|
||||
trace_recursive_unlock();
|
||||
|
||||
preempt_enable_notrace();
|
||||
@@ -2803,6 +2972,8 @@ int ring_buffer_write(struct ring_buffer *buffer,
|
||||
|
||||
rb_commit(cpu_buffer, event);
|
||||
|
||||
rb_wakeups(buffer, cpu_buffer);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
preempt_enable_notrace();
|
||||
@@ -4467,3 +4638,320 @@ static int rb_cpu_notify(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RING_BUFFER_STARTUP_TEST
|
||||
/*
|
||||
* This is a basic integrity check of the ring buffer.
|
||||
* Late in the boot cycle this test will run when configured in.
|
||||
* It will kick off a thread per CPU that will go into a loop
|
||||
* writing to the per cpu ring buffer various sizes of data.
|
||||
* Some of the data will be large items, some small.
|
||||
*
|
||||
* Another thread is created that goes into a spin, sending out
|
||||
* IPIs to the other CPUs to also write into the ring buffer.
|
||||
* this is to test the nesting ability of the buffer.
|
||||
*
|
||||
* Basic stats are recorded and reported. If something in the
|
||||
* ring buffer should happen that's not expected, a big warning
|
||||
* is displayed and all ring buffers are disabled.
|
||||
*/
|
||||
static struct task_struct *rb_threads[NR_CPUS] __initdata;
|
||||
|
||||
struct rb_test_data {
|
||||
struct ring_buffer *buffer;
|
||||
unsigned long events;
|
||||
unsigned long bytes_written;
|
||||
unsigned long bytes_alloc;
|
||||
unsigned long bytes_dropped;
|
||||
unsigned long events_nested;
|
||||
unsigned long bytes_written_nested;
|
||||
unsigned long bytes_alloc_nested;
|
||||
unsigned long bytes_dropped_nested;
|
||||
int min_size_nested;
|
||||
int max_size_nested;
|
||||
int max_size;
|
||||
int min_size;
|
||||
int cpu;
|
||||
int cnt;
|
||||
};
|
||||
|
||||
static struct rb_test_data rb_data[NR_CPUS] __initdata;
|
||||
|
||||
/* 1 meg per cpu */
|
||||
#define RB_TEST_BUFFER_SIZE 1048576
|
||||
|
||||
static char rb_string[] __initdata =
|
||||
"abcdefghijklmnopqrstuvwxyz1234567890!@#$%^&*()?+\\"
|
||||
"?+|:';\",.<>/?abcdefghijklmnopqrstuvwxyz1234567890"
|
||||
"!@#$%^&*()?+\\?+|:';\",.<>/?abcdefghijklmnopqrstuv";
|
||||
|
||||
static bool rb_test_started __initdata;
|
||||
|
||||
struct rb_item {
|
||||
int size;
|
||||
char str[];
|
||||
};
|
||||
|
||||
static __init int rb_write_something(struct rb_test_data *data, bool nested)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct rb_item *item;
|
||||
bool started;
|
||||
int event_len;
|
||||
int size;
|
||||
int len;
|
||||
int cnt;
|
||||
|
||||
/* Have nested writes different that what is written */
|
||||
cnt = data->cnt + (nested ? 27 : 0);
|
||||
|
||||
/* Multiply cnt by ~e, to make some unique increment */
|
||||
size = (data->cnt * 68 / 25) % (sizeof(rb_string) - 1);
|
||||
|
||||
len = size + sizeof(struct rb_item);
|
||||
|
||||
started = rb_test_started;
|
||||
/* read rb_test_started before checking buffer enabled */
|
||||
smp_rmb();
|
||||
|
||||
event = ring_buffer_lock_reserve(data->buffer, len);
|
||||
if (!event) {
|
||||
/* Ignore dropped events before test starts. */
|
||||
if (started) {
|
||||
if (nested)
|
||||
data->bytes_dropped += len;
|
||||
else
|
||||
data->bytes_dropped_nested += len;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
event_len = ring_buffer_event_length(event);
|
||||
|
||||
if (RB_WARN_ON(data->buffer, event_len < len))
|
||||
goto out;
|
||||
|
||||
item = ring_buffer_event_data(event);
|
||||
item->size = size;
|
||||
memcpy(item->str, rb_string, size);
|
||||
|
||||
if (nested) {
|
||||
data->bytes_alloc_nested += event_len;
|
||||
data->bytes_written_nested += len;
|
||||
data->events_nested++;
|
||||
if (!data->min_size_nested || len < data->min_size_nested)
|
||||
data->min_size_nested = len;
|
||||
if (len > data->max_size_nested)
|
||||
data->max_size_nested = len;
|
||||
} else {
|
||||
data->bytes_alloc += event_len;
|
||||
data->bytes_written += len;
|
||||
data->events++;
|
||||
if (!data->min_size || len < data->min_size)
|
||||
data->max_size = len;
|
||||
if (len > data->max_size)
|
||||
data->max_size = len;
|
||||
}
|
||||
|
||||
out:
|
||||
ring_buffer_unlock_commit(data->buffer, event);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int rb_test(void *arg)
|
||||
{
|
||||
struct rb_test_data *data = arg;
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
rb_write_something(data, false);
|
||||
data->cnt++;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
/* Now sleep between a min of 100-300us and a max of 1ms */
|
||||
usleep_range(((data->cnt % 3) + 1) * 100, 1000);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init void rb_ipi(void *ignore)
|
||||
{
|
||||
struct rb_test_data *data;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
data = &rb_data[cpu];
|
||||
rb_write_something(data, true);
|
||||
}
|
||||
|
||||
static __init int rb_hammer_test(void *arg)
|
||||
{
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
/* Send an IPI to all cpus to write data! */
|
||||
smp_call_function(rb_ipi, NULL, 1);
|
||||
/* No sleep, but for non preempt, let others run */
|
||||
schedule();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __init int test_ringbuffer(void)
|
||||
{
|
||||
struct task_struct *rb_hammer;
|
||||
struct ring_buffer *buffer;
|
||||
int cpu;
|
||||
int ret = 0;
|
||||
|
||||
pr_info("Running ring buffer tests...\n");
|
||||
|
||||
buffer = ring_buffer_alloc(RB_TEST_BUFFER_SIZE, RB_FL_OVERWRITE);
|
||||
if (WARN_ON(!buffer))
|
||||
return 0;
|
||||
|
||||
/* Disable buffer so that threads can't write to it yet */
|
||||
ring_buffer_record_off(buffer);
|
||||
|
||||
for_each_online_cpu(cpu) {
|
||||
rb_data[cpu].buffer = buffer;
|
||||
rb_data[cpu].cpu = cpu;
|
||||
rb_data[cpu].cnt = cpu;
|
||||
rb_threads[cpu] = kthread_create(rb_test, &rb_data[cpu],
|
||||
"rbtester/%d", cpu);
|
||||
if (WARN_ON(!rb_threads[cpu])) {
|
||||
pr_cont("FAILED\n");
|
||||
ret = -1;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
kthread_bind(rb_threads[cpu], cpu);
|
||||
wake_up_process(rb_threads[cpu]);
|
||||
}
|
||||
|
||||
/* Now create the rb hammer! */
|
||||
rb_hammer = kthread_run(rb_hammer_test, NULL, "rbhammer");
|
||||
if (WARN_ON(!rb_hammer)) {
|
||||
pr_cont("FAILED\n");
|
||||
ret = -1;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
ring_buffer_record_on(buffer);
|
||||
/*
|
||||
* Show buffer is enabled before setting rb_test_started.
|
||||
* Yes there's a small race window where events could be
|
||||
* dropped and the thread wont catch it. But when a ring
|
||||
* buffer gets enabled, there will always be some kind of
|
||||
* delay before other CPUs see it. Thus, we don't care about
|
||||
* those dropped events. We care about events dropped after
|
||||
* the threads see that the buffer is active.
|
||||
*/
|
||||
smp_wmb();
|
||||
rb_test_started = true;
|
||||
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
/* Just run for 10 seconds */;
|
||||
schedule_timeout(10 * HZ);
|
||||
|
||||
kthread_stop(rb_hammer);
|
||||
|
||||
out_free:
|
||||
for_each_online_cpu(cpu) {
|
||||
if (!rb_threads[cpu])
|
||||
break;
|
||||
kthread_stop(rb_threads[cpu]);
|
||||
}
|
||||
if (ret) {
|
||||
ring_buffer_free(buffer);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Report! */
|
||||
pr_info("finished\n");
|
||||
for_each_online_cpu(cpu) {
|
||||
struct ring_buffer_event *event;
|
||||
struct rb_test_data *data = &rb_data[cpu];
|
||||
struct rb_item *item;
|
||||
unsigned long total_events;
|
||||
unsigned long total_dropped;
|
||||
unsigned long total_written;
|
||||
unsigned long total_alloc;
|
||||
unsigned long total_read = 0;
|
||||
unsigned long total_size = 0;
|
||||
unsigned long total_len = 0;
|
||||
unsigned long total_lost = 0;
|
||||
unsigned long lost;
|
||||
int big_event_size;
|
||||
int small_event_size;
|
||||
|
||||
ret = -1;
|
||||
|
||||
total_events = data->events + data->events_nested;
|
||||
total_written = data->bytes_written + data->bytes_written_nested;
|
||||
total_alloc = data->bytes_alloc + data->bytes_alloc_nested;
|
||||
total_dropped = data->bytes_dropped + data->bytes_dropped_nested;
|
||||
|
||||
big_event_size = data->max_size + data->max_size_nested;
|
||||
small_event_size = data->min_size + data->min_size_nested;
|
||||
|
||||
pr_info("CPU %d:\n", cpu);
|
||||
pr_info(" events: %ld\n", total_events);
|
||||
pr_info(" dropped bytes: %ld\n", total_dropped);
|
||||
pr_info(" alloced bytes: %ld\n", total_alloc);
|
||||
pr_info(" written bytes: %ld\n", total_written);
|
||||
pr_info(" biggest event: %d\n", big_event_size);
|
||||
pr_info(" smallest event: %d\n", small_event_size);
|
||||
|
||||
if (RB_WARN_ON(buffer, total_dropped))
|
||||
break;
|
||||
|
||||
ret = 0;
|
||||
|
||||
while ((event = ring_buffer_consume(buffer, cpu, NULL, &lost))) {
|
||||
total_lost += lost;
|
||||
item = ring_buffer_event_data(event);
|
||||
total_len += ring_buffer_event_length(event);
|
||||
total_size += item->size + sizeof(struct rb_item);
|
||||
if (memcmp(&item->str[0], rb_string, item->size) != 0) {
|
||||
pr_info("FAILED!\n");
|
||||
pr_info("buffer had: %.*s\n", item->size, item->str);
|
||||
pr_info("expected: %.*s\n", item->size, rb_string);
|
||||
RB_WARN_ON(buffer, 1);
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
total_read++;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
ret = -1;
|
||||
|
||||
pr_info(" read events: %ld\n", total_read);
|
||||
pr_info(" lost events: %ld\n", total_lost);
|
||||
pr_info(" total events: %ld\n", total_lost + total_read);
|
||||
pr_info(" recorded len bytes: %ld\n", total_len);
|
||||
pr_info(" recorded size bytes: %ld\n", total_size);
|
||||
if (total_lost)
|
||||
pr_info(" With dropped events, record len and size may not match\n"
|
||||
" alloced and written from above\n");
|
||||
if (!total_lost) {
|
||||
if (RB_WARN_ON(buffer, total_len != total_alloc ||
|
||||
total_size != total_written))
|
||||
break;
|
||||
}
|
||||
if (RB_WARN_ON(buffer, total_lost + total_read != total_events))
|
||||
break;
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
if (!ret)
|
||||
pr_info("Ring buffer PASSED!\n");
|
||||
|
||||
ring_buffer_free(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(test_ringbuffer);
|
||||
#endif /* CONFIG_RING_BUFFER_STARTUP_TEST */
|
||||
|
2267
kernel/trace/trace.c
2267
kernel/trace/trace.c
File diff suppressed because it is too large
Load Diff
@@ -13,6 +13,11 @@
|
||||
#include <linux/trace_seq.h>
|
||||
#include <linux/ftrace_event.h>
|
||||
|
||||
#ifdef CONFIG_FTRACE_SYSCALLS
|
||||
#include <asm/unistd.h> /* For NR_SYSCALLS */
|
||||
#include <asm/syscall.h> /* some archs define it here */
|
||||
#endif
|
||||
|
||||
enum trace_type {
|
||||
__TRACE_FIRST_TYPE = 0,
|
||||
|
||||
@@ -29,6 +34,7 @@ enum trace_type {
|
||||
TRACE_GRAPH_ENT,
|
||||
TRACE_USER_STACK,
|
||||
TRACE_BLK,
|
||||
TRACE_BPUTS,
|
||||
|
||||
__TRACE_LAST_TYPE,
|
||||
};
|
||||
@@ -103,11 +109,6 @@ struct kretprobe_trace_entry_head {
|
||||
unsigned long ret_ip;
|
||||
};
|
||||
|
||||
struct uprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned long ip;
|
||||
};
|
||||
|
||||
/*
|
||||
* trace_flag_type is an enumeration that holds different
|
||||
* states when a trace occurs. These are:
|
||||
@@ -127,12 +128,21 @@ enum trace_flag_type {
|
||||
|
||||
#define TRACE_BUF_SIZE 1024
|
||||
|
||||
struct trace_array;
|
||||
|
||||
struct trace_cpu {
|
||||
struct trace_array *tr;
|
||||
struct dentry *dir;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
/*
|
||||
* The CPU trace array - it consists of thousands of trace entries
|
||||
* plus some other descriptor data: (for example which task started
|
||||
* the trace, etc.)
|
||||
*/
|
||||
struct trace_array_cpu {
|
||||
struct trace_cpu trace_cpu;
|
||||
atomic_t disabled;
|
||||
void *buffer_page; /* ring buffer spare */
|
||||
|
||||
@@ -151,20 +161,83 @@ struct trace_array_cpu {
|
||||
char comm[TASK_COMM_LEN];
|
||||
};
|
||||
|
||||
struct tracer;
|
||||
|
||||
struct trace_buffer {
|
||||
struct trace_array *tr;
|
||||
struct ring_buffer *buffer;
|
||||
struct trace_array_cpu __percpu *data;
|
||||
cycle_t time_start;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
/*
|
||||
* The trace array - an array of per-CPU trace arrays. This is the
|
||||
* highest level data structure that individual tracers deal with.
|
||||
* They have on/off state as well:
|
||||
*/
|
||||
struct trace_array {
|
||||
struct ring_buffer *buffer;
|
||||
int cpu;
|
||||
struct list_head list;
|
||||
char *name;
|
||||
struct trace_buffer trace_buffer;
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
/*
|
||||
* The max_buffer is used to snapshot the trace when a maximum
|
||||
* latency is reached, or when the user initiates a snapshot.
|
||||
* Some tracers will use this to store a maximum trace while
|
||||
* it continues examining live traces.
|
||||
*
|
||||
* The buffers for the max_buffer are set up the same as the trace_buffer
|
||||
* When a snapshot is taken, the buffer of the max_buffer is swapped
|
||||
* with the buffer of the trace_buffer and the buffers are reset for
|
||||
* the trace_buffer so the tracing can continue.
|
||||
*/
|
||||
struct trace_buffer max_buffer;
|
||||
bool allocated_snapshot;
|
||||
#endif
|
||||
int buffer_disabled;
|
||||
cycle_t time_start;
|
||||
struct trace_cpu trace_cpu; /* place holder */
|
||||
#ifdef CONFIG_FTRACE_SYSCALLS
|
||||
int sys_refcount_enter;
|
||||
int sys_refcount_exit;
|
||||
DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
|
||||
DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
|
||||
#endif
|
||||
int stop_count;
|
||||
int clock_id;
|
||||
struct tracer *current_trace;
|
||||
unsigned int flags;
|
||||
raw_spinlock_t start_lock;
|
||||
struct dentry *dir;
|
||||
struct dentry *options;
|
||||
struct dentry *percpu_dir;
|
||||
struct dentry *event_dir;
|
||||
struct list_head systems;
|
||||
struct list_head events;
|
||||
struct task_struct *waiter;
|
||||
struct trace_array_cpu *data[NR_CPUS];
|
||||
int ref;
|
||||
};
|
||||
|
||||
enum {
|
||||
TRACE_ARRAY_FL_GLOBAL = (1 << 0)
|
||||
};
|
||||
|
||||
extern struct list_head ftrace_trace_arrays;
|
||||
|
||||
/*
|
||||
* The global tracer (top) should be the first trace array added,
|
||||
* but we check the flag anyway.
|
||||
*/
|
||||
static inline struct trace_array *top_trace_array(void)
|
||||
{
|
||||
struct trace_array *tr;
|
||||
|
||||
tr = list_entry(ftrace_trace_arrays.prev,
|
||||
typeof(*tr), list);
|
||||
WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
|
||||
return tr;
|
||||
}
|
||||
|
||||
#define FTRACE_CMP_TYPE(var, type) \
|
||||
__builtin_types_compatible_p(typeof(var), type *)
|
||||
|
||||
@@ -200,6 +273,7 @@ extern void __ftrace_bad_type(void);
|
||||
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
|
||||
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
|
||||
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
|
||||
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
|
||||
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
|
||||
TRACE_MMIO_RW); \
|
||||
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
|
||||
@@ -283,11 +357,16 @@ struct tracer {
|
||||
enum print_line_t (*print_line)(struct trace_iterator *iter);
|
||||
/* If you handled the flag setting, return 0 */
|
||||
int (*set_flag)(u32 old_flags, u32 bit, int set);
|
||||
/* Return 0 if OK with change, else return non-zero */
|
||||
int (*flag_changed)(struct tracer *tracer,
|
||||
u32 mask, int set);
|
||||
struct tracer *next;
|
||||
struct tracer_flags *flags;
|
||||
bool print_max;
|
||||
bool enabled;
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
bool use_max_tr;
|
||||
bool allocated_snapshot;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
@@ -423,8 +502,6 @@ static __always_inline void trace_clear_recursion(int bit)
|
||||
current->trace_recursion = val;
|
||||
}
|
||||
|
||||
#define TRACE_PIPE_ALL_CPU -1
|
||||
|
||||
static inline struct ring_buffer_iter *
|
||||
trace_buffer_iter(struct trace_iterator *iter, int cpu)
|
||||
{
|
||||
@@ -435,10 +512,10 @@ trace_buffer_iter(struct trace_iterator *iter, int cpu)
|
||||
|
||||
int tracer_init(struct tracer *t, struct trace_array *tr);
|
||||
int tracing_is_enabled(void);
|
||||
void tracing_reset(struct trace_array *tr, int cpu);
|
||||
void tracing_reset_online_cpus(struct trace_array *tr);
|
||||
void tracing_reset(struct trace_buffer *buf, int cpu);
|
||||
void tracing_reset_online_cpus(struct trace_buffer *buf);
|
||||
void tracing_reset_current(int cpu);
|
||||
void tracing_reset_current_online_cpus(void);
|
||||
void tracing_reset_all_online_cpus(void);
|
||||
int tracing_open_generic(struct inode *inode, struct file *filp);
|
||||
struct dentry *trace_create_file(const char *name,
|
||||
umode_t mode,
|
||||
@@ -446,6 +523,7 @@ struct dentry *trace_create_file(const char *name,
|
||||
void *data,
|
||||
const struct file_operations *fops);
|
||||
|
||||
struct dentry *tracing_init_dentry_tr(struct trace_array *tr);
|
||||
struct dentry *tracing_init_dentry(void);
|
||||
|
||||
struct ring_buffer_event;
|
||||
@@ -579,7 +657,7 @@ extern int DYN_FTRACE_TEST_NAME(void);
|
||||
#define DYN_FTRACE_TEST_NAME2 trace_selftest_dynamic_test_func2
|
||||
extern int DYN_FTRACE_TEST_NAME2(void);
|
||||
|
||||
extern int ring_buffer_expanded;
|
||||
extern bool ring_buffer_expanded;
|
||||
extern bool tracing_selftest_disabled;
|
||||
DECLARE_PER_CPU(int, ftrace_cpu_disabled);
|
||||
|
||||
@@ -615,6 +693,8 @@ trace_array_vprintk(struct trace_array *tr,
|
||||
unsigned long ip, const char *fmt, va_list args);
|
||||
int trace_array_printk(struct trace_array *tr,
|
||||
unsigned long ip, const char *fmt, ...);
|
||||
int trace_array_printk_buf(struct ring_buffer *buffer,
|
||||
unsigned long ip, const char *fmt, ...);
|
||||
void trace_printk_seq(struct trace_seq *s);
|
||||
enum print_line_t print_trace_line(struct trace_iterator *iter);
|
||||
|
||||
@@ -782,6 +862,7 @@ enum trace_iterator_flags {
|
||||
TRACE_ITER_STOP_ON_FREE = 0x400000,
|
||||
TRACE_ITER_IRQ_INFO = 0x800000,
|
||||
TRACE_ITER_MARKERS = 0x1000000,
|
||||
TRACE_ITER_FUNCTION = 0x2000000,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -828,8 +909,8 @@ enum {
|
||||
|
||||
struct ftrace_event_field {
|
||||
struct list_head link;
|
||||
char *name;
|
||||
char *type;
|
||||
const char *name;
|
||||
const char *type;
|
||||
int filter_type;
|
||||
int offset;
|
||||
int size;
|
||||
@@ -847,12 +928,19 @@ struct event_filter {
|
||||
struct event_subsystem {
|
||||
struct list_head list;
|
||||
const char *name;
|
||||
struct dentry *entry;
|
||||
struct event_filter *filter;
|
||||
int nr_events;
|
||||
int ref_count;
|
||||
};
|
||||
|
||||
struct ftrace_subsystem_dir {
|
||||
struct list_head list;
|
||||
struct event_subsystem *subsystem;
|
||||
struct trace_array *tr;
|
||||
struct dentry *entry;
|
||||
int ref_count;
|
||||
int nr_events;
|
||||
};
|
||||
|
||||
#define FILTER_PRED_INVALID ((unsigned short)-1)
|
||||
#define FILTER_PRED_IS_RIGHT (1 << 15)
|
||||
#define FILTER_PRED_FOLD (1 << 15)
|
||||
@@ -902,22 +990,20 @@ struct filter_pred {
|
||||
unsigned short right;
|
||||
};
|
||||
|
||||
extern struct list_head ftrace_common_fields;
|
||||
|
||||
extern enum regex_type
|
||||
filter_parse_regex(char *buff, int len, char **search, int *not);
|
||||
extern void print_event_filter(struct ftrace_event_call *call,
|
||||
struct trace_seq *s);
|
||||
extern int apply_event_filter(struct ftrace_event_call *call,
|
||||
char *filter_string);
|
||||
extern int apply_subsystem_event_filter(struct event_subsystem *system,
|
||||
extern int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
|
||||
char *filter_string);
|
||||
extern void print_subsystem_event_filter(struct event_subsystem *system,
|
||||
struct trace_seq *s);
|
||||
extern int filter_assign_type(const char *type);
|
||||
|
||||
struct list_head *
|
||||
trace_get_fields(struct ftrace_event_call *event_call);
|
||||
struct ftrace_event_field *
|
||||
trace_find_event_field(struct ftrace_event_call *call, char *name);
|
||||
|
||||
static inline int
|
||||
filter_check_discard(struct ftrace_event_call *call, void *rec,
|
||||
@@ -934,6 +1020,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
|
||||
}
|
||||
|
||||
extern void trace_event_enable_cmd_record(bool enable);
|
||||
extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
|
||||
extern int event_trace_del_tracer(struct trace_array *tr);
|
||||
|
||||
extern struct mutex event_mutex;
|
||||
extern struct list_head ftrace_events;
|
||||
@@ -943,6 +1031,19 @@ extern const char *__stop___trace_bprintk_fmt[];
|
||||
|
||||
void trace_printk_init_buffers(void);
|
||||
void trace_printk_start_comm(void);
|
||||
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
|
||||
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
|
||||
|
||||
/*
|
||||
* Normal trace_printk() and friends allocates special buffers
|
||||
* to do the manipulation, as well as saves the print formats
|
||||
* into sections to display. But the trace infrastructure wants
|
||||
* to use these without the added overhead at the price of being
|
||||
* a bit slower (used mainly for warnings, where we don't care
|
||||
* about performance). The internal_trace_puts() is for such
|
||||
* a purpose.
|
||||
*/
|
||||
#define internal_trace_puts(str) __trace_puts(_THIS_IP_, str, strlen(str))
|
||||
|
||||
#undef FTRACE_ENTRY
|
||||
#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
|
||||
|
@@ -32,6 +32,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
|
||||
{
|
||||
struct ftrace_event_call *call = &event_branch;
|
||||
struct trace_array *tr = branch_tracer;
|
||||
struct trace_array_cpu *data;
|
||||
struct ring_buffer_event *event;
|
||||
struct trace_branch *entry;
|
||||
struct ring_buffer *buffer;
|
||||
@@ -51,11 +52,12 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
|
||||
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
if (atomic_inc_return(&data->disabled) != 1)
|
||||
goto out;
|
||||
|
||||
pc = preempt_count();
|
||||
buffer = tr->buffer;
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_BRANCH,
|
||||
sizeof(*entry), flags, pc);
|
||||
if (!event)
|
||||
@@ -80,7 +82,7 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
|
||||
__buffer_unlock_commit(buffer, event);
|
||||
|
||||
out:
|
||||
atomic_dec(&tr->data[cpu]->disabled);
|
||||
atomic_dec(&data->disabled);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
|
@@ -57,6 +57,16 @@ u64 notrace trace_clock(void)
|
||||
return local_clock();
|
||||
}
|
||||
|
||||
/*
|
||||
* trace_jiffy_clock(): Simply use jiffies as a clock counter.
|
||||
*/
|
||||
u64 notrace trace_clock_jiffies(void)
|
||||
{
|
||||
u64 jiffy = jiffies - INITIAL_JIFFIES;
|
||||
|
||||
/* Return nsecs */
|
||||
return (u64)jiffies_to_usecs(jiffy) * 1000ULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* trace_clock_global(): special globally coherent trace clock
|
||||
|
@@ -223,8 +223,8 @@ FTRACE_ENTRY(bprint, bprint_entry,
|
||||
__dynamic_array( u32, buf )
|
||||
),
|
||||
|
||||
F_printk("%08lx fmt:%p",
|
||||
__entry->ip, __entry->fmt),
|
||||
F_printk("%pf: %s",
|
||||
(void *)__entry->ip, __entry->fmt),
|
||||
|
||||
FILTER_OTHER
|
||||
);
|
||||
@@ -238,8 +238,23 @@ FTRACE_ENTRY(print, print_entry,
|
||||
__dynamic_array( char, buf )
|
||||
),
|
||||
|
||||
F_printk("%08lx %s",
|
||||
__entry->ip, __entry->buf),
|
||||
F_printk("%pf: %s",
|
||||
(void *)__entry->ip, __entry->buf),
|
||||
|
||||
FILTER_OTHER
|
||||
);
|
||||
|
||||
FTRACE_ENTRY(bputs, bputs_entry,
|
||||
|
||||
TRACE_BPUTS,
|
||||
|
||||
F_STRUCT(
|
||||
__field( unsigned long, ip )
|
||||
__field( const char *, str )
|
||||
),
|
||||
|
||||
F_printk("%pf: %s",
|
||||
(void *)__entry->ip, __entry->str),
|
||||
|
||||
FILTER_OTHER
|
||||
);
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -658,33 +658,6 @@ void print_subsystem_event_filter(struct event_subsystem *system,
|
||||
mutex_unlock(&event_mutex);
|
||||
}
|
||||
|
||||
static struct ftrace_event_field *
|
||||
__find_event_field(struct list_head *head, char *name)
|
||||
{
|
||||
struct ftrace_event_field *field;
|
||||
|
||||
list_for_each_entry(field, head, link) {
|
||||
if (!strcmp(field->name, name))
|
||||
return field;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ftrace_event_field *
|
||||
find_event_field(struct ftrace_event_call *call, char *name)
|
||||
{
|
||||
struct ftrace_event_field *field;
|
||||
struct list_head *head;
|
||||
|
||||
field = __find_event_field(&ftrace_common_fields, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
head = trace_get_fields(call);
|
||||
return __find_event_field(head, name);
|
||||
}
|
||||
|
||||
static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
|
||||
{
|
||||
stack->preds = kcalloc(n_preds + 1, sizeof(*stack->preds), GFP_KERNEL);
|
||||
@@ -1337,7 +1310,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
field = find_event_field(call, operand1);
|
||||
field = trace_find_event_field(call, operand1);
|
||||
if (!field) {
|
||||
parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
|
||||
return NULL;
|
||||
@@ -1907,16 +1880,17 @@ out_unlock:
|
||||
return err;
|
||||
}
|
||||
|
||||
int apply_subsystem_event_filter(struct event_subsystem *system,
|
||||
int apply_subsystem_event_filter(struct ftrace_subsystem_dir *dir,
|
||||
char *filter_string)
|
||||
{
|
||||
struct event_subsystem *system = dir->subsystem;
|
||||
struct event_filter *filter;
|
||||
int err = 0;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
|
||||
/* Make sure the system still has events */
|
||||
if (!system->nr_events) {
|
||||
if (!dir->nr_events) {
|
||||
err = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@@ -129,7 +129,7 @@ static void __always_unused ____ftrace_check_##name(void) \
|
||||
|
||||
#undef FTRACE_ENTRY
|
||||
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter) \
|
||||
int \
|
||||
static int __init \
|
||||
ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
|
||||
{ \
|
||||
struct struct_name field; \
|
||||
@@ -168,7 +168,7 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
|
||||
#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
|
||||
regfn) \
|
||||
\
|
||||
struct ftrace_event_class event_class_ftrace_##call = { \
|
||||
struct ftrace_event_class __refdata event_class_ftrace_##call = { \
|
||||
.system = __stringify(TRACE_SYSTEM), \
|
||||
.define_fields = ftrace_define_fields_##call, \
|
||||
.fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
|
||||
|
@@ -28,7 +28,7 @@ static void tracing_stop_function_trace(void);
|
||||
static int function_trace_init(struct trace_array *tr)
|
||||
{
|
||||
func_trace = tr;
|
||||
tr->cpu = get_cpu();
|
||||
tr->trace_buffer.cpu = get_cpu();
|
||||
put_cpu();
|
||||
|
||||
tracing_start_cmdline_record();
|
||||
@@ -44,7 +44,7 @@ static void function_trace_reset(struct trace_array *tr)
|
||||
|
||||
static void function_trace_start(struct trace_array *tr)
|
||||
{
|
||||
tracing_reset_online_cpus(tr);
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
}
|
||||
|
||||
/* Our option */
|
||||
@@ -76,7 +76,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
goto out;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
if (!atomic_read(&data->disabled)) {
|
||||
local_save_flags(flags);
|
||||
trace_function(tr, ip, parent_ip, flags, pc);
|
||||
@@ -107,7 +107,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
disabled = atomic_inc_return(&data->disabled);
|
||||
|
||||
if (likely(disabled == 1)) {
|
||||
@@ -214,66 +214,89 @@ static struct tracer function_trace __read_mostly =
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
static void
|
||||
ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
static int update_count(void **data)
|
||||
{
|
||||
long *count = (long *)data;
|
||||
|
||||
if (tracing_is_on())
|
||||
return;
|
||||
unsigned long *count = (long *)data;
|
||||
|
||||
if (!*count)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (*count != -1)
|
||||
(*count)--;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_traceon_count(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (tracing_is_on())
|
||||
return;
|
||||
|
||||
if (update_count(data))
|
||||
tracing_on();
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_traceoff_count(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (!tracing_is_on())
|
||||
return;
|
||||
|
||||
if (update_count(data))
|
||||
tracing_off();
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (tracing_is_on())
|
||||
return;
|
||||
|
||||
tracing_on();
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
long *count = (long *)data;
|
||||
|
||||
if (!tracing_is_on())
|
||||
return;
|
||||
|
||||
if (!*count)
|
||||
return;
|
||||
|
||||
if (*count != -1)
|
||||
(*count)--;
|
||||
|
||||
tracing_off();
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data);
|
||||
/*
|
||||
* Skip 4:
|
||||
* ftrace_stacktrace()
|
||||
* function_trace_probe_call()
|
||||
* ftrace_ops_list_func()
|
||||
* ftrace_call()
|
||||
*/
|
||||
#define STACK_SKIP 4
|
||||
|
||||
static struct ftrace_probe_ops traceon_probe_ops = {
|
||||
.func = ftrace_traceon,
|
||||
.print = ftrace_trace_onoff_print,
|
||||
};
|
||||
static void
|
||||
ftrace_stacktrace(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
trace_dump_stack(STACK_SKIP);
|
||||
}
|
||||
|
||||
static struct ftrace_probe_ops traceoff_probe_ops = {
|
||||
.func = ftrace_traceoff,
|
||||
.print = ftrace_trace_onoff_print,
|
||||
};
|
||||
static void
|
||||
ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (!tracing_is_on())
|
||||
return;
|
||||
|
||||
if (update_count(data))
|
||||
trace_dump_stack(STACK_SKIP);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
ftrace_probe_print(const char *name, struct seq_file *m,
|
||||
unsigned long ip, void *data)
|
||||
{
|
||||
long count = (long)data;
|
||||
|
||||
seq_printf(m, "%ps:", (void *)ip);
|
||||
|
||||
if (ops == &traceon_probe_ops)
|
||||
seq_printf(m, "traceon");
|
||||
else
|
||||
seq_printf(m, "traceoff");
|
||||
seq_printf(m, "%ps:%s", (void *)ip, name);
|
||||
|
||||
if (count == -1)
|
||||
seq_printf(m, ":unlimited\n");
|
||||
@@ -284,26 +307,61 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
|
||||
ftrace_traceon_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
|
||||
/* we register both traceon and traceoff to this callback */
|
||||
if (strcmp(cmd, "traceon") == 0)
|
||||
ops = &traceon_probe_ops;
|
||||
else
|
||||
ops = &traceoff_probe_ops;
|
||||
|
||||
unregister_ftrace_function_probe_func(glob, ops);
|
||||
|
||||
return 0;
|
||||
return ftrace_probe_print("traceon", m, ip, data);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_trace_onoff_callback(struct ftrace_hash *hash,
|
||||
char *glob, char *cmd, char *param, int enable)
|
||||
ftrace_traceoff_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
{
|
||||
return ftrace_probe_print("traceoff", m, ip, data);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_stacktrace_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
{
|
||||
return ftrace_probe_print("stacktrace", m, ip, data);
|
||||
}
|
||||
|
||||
static struct ftrace_probe_ops traceon_count_probe_ops = {
|
||||
.func = ftrace_traceon_count,
|
||||
.print = ftrace_traceon_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops traceoff_count_probe_ops = {
|
||||
.func = ftrace_traceoff_count,
|
||||
.print = ftrace_traceoff_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops stacktrace_count_probe_ops = {
|
||||
.func = ftrace_stacktrace_count,
|
||||
.print = ftrace_stacktrace_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops traceon_probe_ops = {
|
||||
.func = ftrace_traceon,
|
||||
.print = ftrace_traceon_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops traceoff_probe_ops = {
|
||||
.func = ftrace_traceoff,
|
||||
.print = ftrace_traceoff_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops stacktrace_probe_ops = {
|
||||
.func = ftrace_stacktrace,
|
||||
.print = ftrace_stacktrace_print,
|
||||
};
|
||||
|
||||
static int
|
||||
ftrace_trace_probe_callback(struct ftrace_probe_ops *ops,
|
||||
struct ftrace_hash *hash, char *glob,
|
||||
char *cmd, char *param, int enable)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
void *count = (void *)-1;
|
||||
char *number;
|
||||
int ret;
|
||||
@@ -312,14 +370,10 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash,
|
||||
if (!enable)
|
||||
return -EINVAL;
|
||||
|
||||
if (glob[0] == '!')
|
||||
return ftrace_trace_onoff_unreg(glob+1, cmd, param);
|
||||
|
||||
/* we register both traceon and traceoff to this callback */
|
||||
if (strcmp(cmd, "traceon") == 0)
|
||||
ops = &traceon_probe_ops;
|
||||
else
|
||||
ops = &traceoff_probe_ops;
|
||||
if (glob[0] == '!') {
|
||||
unregister_ftrace_function_probe_func(glob+1, ops);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!param)
|
||||
goto out_reg;
|
||||
@@ -343,6 +397,34 @@ ftrace_trace_onoff_callback(struct ftrace_hash *hash,
|
||||
return ret < 0 ? ret : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_trace_onoff_callback(struct ftrace_hash *hash,
|
||||
char *glob, char *cmd, char *param, int enable)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
|
||||
/* we register both traceon and traceoff to this callback */
|
||||
if (strcmp(cmd, "traceon") == 0)
|
||||
ops = param ? &traceon_count_probe_ops : &traceon_probe_ops;
|
||||
else
|
||||
ops = param ? &traceoff_count_probe_ops : &traceoff_probe_ops;
|
||||
|
||||
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
|
||||
param, enable);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_stacktrace_callback(struct ftrace_hash *hash,
|
||||
char *glob, char *cmd, char *param, int enable)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
|
||||
ops = param ? &stacktrace_count_probe_ops : &stacktrace_probe_ops;
|
||||
|
||||
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
|
||||
param, enable);
|
||||
}
|
||||
|
||||
static struct ftrace_func_command ftrace_traceon_cmd = {
|
||||
.name = "traceon",
|
||||
.func = ftrace_trace_onoff_callback,
|
||||
@@ -353,6 +435,11 @@ static struct ftrace_func_command ftrace_traceoff_cmd = {
|
||||
.func = ftrace_trace_onoff_callback,
|
||||
};
|
||||
|
||||
static struct ftrace_func_command ftrace_stacktrace_cmd = {
|
||||
.name = "stacktrace",
|
||||
.func = ftrace_stacktrace_callback,
|
||||
};
|
||||
|
||||
static int __init init_func_cmd_traceon(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -364,6 +451,12 @@ static int __init init_func_cmd_traceon(void)
|
||||
ret = register_ftrace_command(&ftrace_traceon_cmd);
|
||||
if (ret)
|
||||
unregister_ftrace_command(&ftrace_traceoff_cmd);
|
||||
|
||||
ret = register_ftrace_command(&ftrace_stacktrace_cmd);
|
||||
if (ret) {
|
||||
unregister_ftrace_command(&ftrace_traceoff_cmd);
|
||||
unregister_ftrace_command(&ftrace_traceon_cmd);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
|
@@ -218,7 +218,7 @@ int __trace_graph_entry(struct trace_array *tr,
|
||||
{
|
||||
struct ftrace_event_call *call = &event_funcgraph_entry;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
struct ftrace_graph_ent_entry *entry;
|
||||
|
||||
if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
|
||||
@@ -265,7 +265,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
|
||||
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
disabled = atomic_inc_return(&data->disabled);
|
||||
if (likely(disabled == 1)) {
|
||||
pc = preempt_count();
|
||||
@@ -323,7 +323,7 @@ void __trace_graph_return(struct trace_array *tr,
|
||||
{
|
||||
struct ftrace_event_call *call = &event_funcgraph_exit;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
struct ftrace_graph_ret_entry *entry;
|
||||
|
||||
if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
|
||||
@@ -350,7 +350,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace)
|
||||
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
disabled = atomic_inc_return(&data->disabled);
|
||||
if (likely(disabled == 1)) {
|
||||
pc = preempt_count();
|
||||
@@ -560,9 +560,9 @@ get_return_for_leaf(struct trace_iterator *iter,
|
||||
* We need to consume the current entry to see
|
||||
* the next one.
|
||||
*/
|
||||
ring_buffer_consume(iter->tr->buffer, iter->cpu,
|
||||
ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu,
|
||||
NULL, NULL);
|
||||
event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
|
||||
event = ring_buffer_peek(iter->trace_buffer->buffer, iter->cpu,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
|
@@ -32,7 +32,8 @@ enum {
|
||||
|
||||
static int trace_type __read_mostly;
|
||||
|
||||
static int save_lat_flag;
|
||||
static int save_flags;
|
||||
static bool function_enabled;
|
||||
|
||||
static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
|
||||
static int start_irqsoff_tracer(struct trace_array *tr, int graph);
|
||||
@@ -121,7 +122,7 @@ static int func_prolog_dec(struct trace_array *tr,
|
||||
if (!irqs_disabled_flags(*flags))
|
||||
return 0;
|
||||
|
||||
*data = tr->data[cpu];
|
||||
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
disabled = atomic_inc_return(&(*data)->disabled);
|
||||
|
||||
if (likely(disabled == 1))
|
||||
@@ -175,7 +176,7 @@ static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
|
||||
per_cpu(tracing_cpu, cpu) = 0;
|
||||
|
||||
tracing_max_latency = 0;
|
||||
tracing_reset_online_cpus(irqsoff_trace);
|
||||
tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
|
||||
|
||||
return start_irqsoff_tracer(irqsoff_trace, set);
|
||||
}
|
||||
@@ -380,7 +381,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
|
||||
if (per_cpu(tracing_cpu, cpu))
|
||||
return;
|
||||
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
|
||||
if (unlikely(!data) || atomic_read(&data->disabled))
|
||||
return;
|
||||
@@ -418,7 +419,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
|
||||
if (!tracer_enabled)
|
||||
return;
|
||||
|
||||
data = tr->data[cpu];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
|
||||
if (unlikely(!data) ||
|
||||
!data->critical_start || atomic_read(&data->disabled))
|
||||
@@ -528,15 +529,60 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
|
||||
}
|
||||
#endif /* CONFIG_PREEMPT_TRACER */
|
||||
|
||||
static int start_irqsoff_tracer(struct trace_array *tr, int graph)
|
||||
static int register_irqsoff_function(int graph, int set)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
if (!graph)
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
else
|
||||
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
|
||||
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
|
||||
return 0;
|
||||
|
||||
if (graph)
|
||||
ret = register_ftrace_graph(&irqsoff_graph_return,
|
||||
&irqsoff_graph_entry);
|
||||
else
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
|
||||
if (!ret)
|
||||
function_enabled = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unregister_irqsoff_function(int graph)
|
||||
{
|
||||
if (!function_enabled)
|
||||
return;
|
||||
|
||||
if (graph)
|
||||
unregister_ftrace_graph();
|
||||
else
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
|
||||
function_enabled = false;
|
||||
}
|
||||
|
||||
static void irqsoff_function_set(int set)
|
||||
{
|
||||
if (set)
|
||||
register_irqsoff_function(is_graph(), 1);
|
||||
else
|
||||
unregister_irqsoff_function(is_graph());
|
||||
}
|
||||
|
||||
static int irqsoff_flag_changed(struct tracer *tracer, u32 mask, int set)
|
||||
{
|
||||
if (mask & TRACE_ITER_FUNCTION)
|
||||
irqsoff_function_set(set);
|
||||
|
||||
return trace_keep_overwrite(tracer, mask, set);
|
||||
}
|
||||
|
||||
static int start_irqsoff_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_irqsoff_function(graph, 0);
|
||||
|
||||
if (!ret && tracing_is_enabled())
|
||||
tracer_enabled = 1;
|
||||
@@ -550,22 +596,22 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
tracer_enabled = 0;
|
||||
|
||||
if (!graph)
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
else
|
||||
unregister_ftrace_graph();
|
||||
unregister_irqsoff_function(graph);
|
||||
}
|
||||
|
||||
static void __irqsoff_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
|
||||
trace_flags |= TRACE_ITER_LATENCY_FMT;
|
||||
save_flags = trace_flags;
|
||||
|
||||
/* non overwrite screws up the latency tracers */
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
|
||||
|
||||
tracing_max_latency = 0;
|
||||
irqsoff_trace = tr;
|
||||
/* make sure that the tracer is visible */
|
||||
smp_wmb();
|
||||
tracing_reset_online_cpus(tr);
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
|
||||
if (start_irqsoff_tracer(tr, is_graph()))
|
||||
printk(KERN_ERR "failed to start irqsoff tracer\n");
|
||||
@@ -573,10 +619,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
|
||||
|
||||
static void irqsoff_tracer_reset(struct trace_array *tr)
|
||||
{
|
||||
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
|
||||
int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
|
||||
|
||||
stop_irqsoff_tracer(tr, is_graph());
|
||||
|
||||
if (!save_lat_flag)
|
||||
trace_flags &= ~TRACE_ITER_LATENCY_FMT;
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
|
||||
}
|
||||
|
||||
static void irqsoff_tracer_start(struct trace_array *tr)
|
||||
@@ -609,6 +658,7 @@ static struct tracer irqsoff_tracer __read_mostly =
|
||||
.print_line = irqsoff_print_line,
|
||||
.flags = &tracer_flags,
|
||||
.set_flag = irqsoff_set_flag,
|
||||
.flag_changed = irqsoff_flag_changed,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_irqsoff,
|
||||
#endif
|
||||
@@ -642,6 +692,7 @@ static struct tracer preemptoff_tracer __read_mostly =
|
||||
.print_line = irqsoff_print_line,
|
||||
.flags = &tracer_flags,
|
||||
.set_flag = irqsoff_set_flag,
|
||||
.flag_changed = irqsoff_flag_changed,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_preemptoff,
|
||||
#endif
|
||||
@@ -677,6 +728,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
|
||||
.print_line = irqsoff_print_line,
|
||||
.flags = &tracer_flags,
|
||||
.set_flag = irqsoff_set_flag,
|
||||
.flag_changed = irqsoff_flag_changed,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_preemptirqsoff,
|
||||
#endif
|
||||
|
@@ -26,7 +26,7 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
|
||||
trace_init_global_iter(&iter);
|
||||
|
||||
for_each_tracing_cpu(cpu) {
|
||||
atomic_inc(&iter.tr->data[cpu]->disabled);
|
||||
atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
|
||||
}
|
||||
|
||||
old_userobj = trace_flags;
|
||||
@@ -43,17 +43,17 @@ static void ftrace_dump_buf(int skip_lines, long cpu_file)
|
||||
iter.iter_flags |= TRACE_FILE_LAT_FMT;
|
||||
iter.pos = -1;
|
||||
|
||||
if (cpu_file == TRACE_PIPE_ALL_CPU) {
|
||||
if (cpu_file == RING_BUFFER_ALL_CPUS) {
|
||||
for_each_tracing_cpu(cpu) {
|
||||
iter.buffer_iter[cpu] =
|
||||
ring_buffer_read_prepare(iter.tr->buffer, cpu);
|
||||
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu);
|
||||
ring_buffer_read_start(iter.buffer_iter[cpu]);
|
||||
tracing_iter_reset(&iter, cpu);
|
||||
}
|
||||
} else {
|
||||
iter.cpu_file = cpu_file;
|
||||
iter.buffer_iter[cpu_file] =
|
||||
ring_buffer_read_prepare(iter.tr->buffer, cpu_file);
|
||||
ring_buffer_read_prepare(iter.trace_buffer->buffer, cpu_file);
|
||||
ring_buffer_read_start(iter.buffer_iter[cpu_file]);
|
||||
tracing_iter_reset(&iter, cpu_file);
|
||||
}
|
||||
@@ -83,7 +83,7 @@ out:
|
||||
trace_flags = old_userobj;
|
||||
|
||||
for_each_tracing_cpu(cpu) {
|
||||
atomic_dec(&iter.tr->data[cpu]->disabled);
|
||||
atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
|
||||
}
|
||||
|
||||
for_each_tracing_cpu(cpu)
|
||||
@@ -115,7 +115,7 @@ static int kdb_ftdump(int argc, const char **argv)
|
||||
!cpu_online(cpu_file))
|
||||
return KDB_BADINT;
|
||||
} else {
|
||||
cpu_file = TRACE_PIPE_ALL_CPU;
|
||||
cpu_file = RING_BUFFER_ALL_CPUS;
|
||||
}
|
||||
|
||||
kdb_trap_printk++;
|
||||
|
@@ -31,7 +31,7 @@ static void mmio_reset_data(struct trace_array *tr)
|
||||
overrun_detected = false;
|
||||
prev_overruns = 0;
|
||||
|
||||
tracing_reset_online_cpus(tr);
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
}
|
||||
|
||||
static int mmio_trace_init(struct trace_array *tr)
|
||||
@@ -128,7 +128,7 @@ static void mmio_close(struct trace_iterator *iter)
|
||||
static unsigned long count_overruns(struct trace_iterator *iter)
|
||||
{
|
||||
unsigned long cnt = atomic_xchg(&dropped_count, 0);
|
||||
unsigned long over = ring_buffer_overruns(iter->tr->buffer);
|
||||
unsigned long over = ring_buffer_overruns(iter->trace_buffer->buffer);
|
||||
|
||||
if (over > prev_overruns)
|
||||
cnt += over - prev_overruns;
|
||||
@@ -309,7 +309,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
|
||||
struct mmiotrace_rw *rw)
|
||||
{
|
||||
struct ftrace_event_call *call = &event_mmiotrace_rw;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
struct ring_buffer_event *event;
|
||||
struct trace_mmiotrace_rw *entry;
|
||||
int pc = preempt_count();
|
||||
@@ -330,7 +330,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
|
||||
void mmio_trace_rw(struct mmiotrace_rw *rw)
|
||||
{
|
||||
struct trace_array *tr = mmio_trace_array;
|
||||
struct trace_array_cpu *data = tr->data[smp_processor_id()];
|
||||
struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
|
||||
__trace_mmiotrace_rw(tr, data, rw);
|
||||
}
|
||||
|
||||
@@ -339,7 +339,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
|
||||
struct mmiotrace_map *map)
|
||||
{
|
||||
struct ftrace_event_call *call = &event_mmiotrace_map;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
struct ring_buffer_event *event;
|
||||
struct trace_mmiotrace_map *entry;
|
||||
int pc = preempt_count();
|
||||
@@ -363,7 +363,7 @@ void mmio_trace_mapping(struct mmiotrace_map *map)
|
||||
struct trace_array_cpu *data;
|
||||
|
||||
preempt_disable();
|
||||
data = tr->data[smp_processor_id()];
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, smp_processor_id());
|
||||
__trace_mmiotrace_map(tr, data, map);
|
||||
preempt_enable();
|
||||
}
|
||||
|
@@ -14,7 +14,7 @@
|
||||
/* must be a power of 2 */
|
||||
#define EVENT_HASHSIZE 128
|
||||
|
||||
DECLARE_RWSEM(trace_event_mutex);
|
||||
DECLARE_RWSEM(trace_event_sem);
|
||||
|
||||
static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
|
||||
|
||||
@@ -37,6 +37,22 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s)
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
|
||||
{
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct trace_entry *entry = iter->ent;
|
||||
struct bputs_entry *field;
|
||||
int ret;
|
||||
|
||||
trace_assign_type(field, entry);
|
||||
|
||||
ret = trace_seq_puts(s, field->str);
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
return TRACE_TYPE_HANDLED;
|
||||
}
|
||||
|
||||
enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
|
||||
{
|
||||
struct trace_seq *s = &iter->seq;
|
||||
@@ -397,6 +413,32 @@ ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
|
||||
}
|
||||
EXPORT_SYMBOL(ftrace_print_hex_seq);
|
||||
|
||||
int ftrace_raw_output_prep(struct trace_iterator *iter,
|
||||
struct trace_event *trace_event)
|
||||
{
|
||||
struct ftrace_event_call *event;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct trace_seq *p = &iter->tmp_seq;
|
||||
struct trace_entry *entry;
|
||||
int ret;
|
||||
|
||||
event = container_of(trace_event, struct ftrace_event_call, event);
|
||||
entry = iter->ent;
|
||||
|
||||
if (entry->type != event->event.type) {
|
||||
WARN_ON_ONCE(1);
|
||||
return TRACE_TYPE_UNHANDLED;
|
||||
}
|
||||
|
||||
trace_seq_init(p);
|
||||
ret = trace_seq_printf(s, "%s: ", event->name);
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(ftrace_raw_output_prep);
|
||||
|
||||
#ifdef CONFIG_KRETPROBES
|
||||
static inline const char *kretprobed(const char *name)
|
||||
{
|
||||
@@ -617,7 +659,7 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
|
||||
{
|
||||
unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
|
||||
unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
|
||||
unsigned long long abs_ts = iter->ts - iter->tr->time_start;
|
||||
unsigned long long abs_ts = iter->ts - iter->trace_buffer->time_start;
|
||||
unsigned long long rel_ts = next_ts - iter->ts;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
|
||||
@@ -783,12 +825,12 @@ static int trace_search_list(struct list_head **list)
|
||||
|
||||
void trace_event_read_lock(void)
|
||||
{
|
||||
down_read(&trace_event_mutex);
|
||||
down_read(&trace_event_sem);
|
||||
}
|
||||
|
||||
void trace_event_read_unlock(void)
|
||||
{
|
||||
up_read(&trace_event_mutex);
|
||||
up_read(&trace_event_sem);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -811,7 +853,7 @@ int register_ftrace_event(struct trace_event *event)
|
||||
unsigned key;
|
||||
int ret = 0;
|
||||
|
||||
down_write(&trace_event_mutex);
|
||||
down_write(&trace_event_sem);
|
||||
|
||||
if (WARN_ON(!event))
|
||||
goto out;
|
||||
@@ -866,14 +908,14 @@ int register_ftrace_event(struct trace_event *event)
|
||||
|
||||
ret = event->type;
|
||||
out:
|
||||
up_write(&trace_event_mutex);
|
||||
up_write(&trace_event_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_ftrace_event);
|
||||
|
||||
/*
|
||||
* Used by module code with the trace_event_mutex held for write.
|
||||
* Used by module code with the trace_event_sem held for write.
|
||||
*/
|
||||
int __unregister_ftrace_event(struct trace_event *event)
|
||||
{
|
||||
@@ -888,9 +930,9 @@ int __unregister_ftrace_event(struct trace_event *event)
|
||||
*/
|
||||
int unregister_ftrace_event(struct trace_event *event)
|
||||
{
|
||||
down_write(&trace_event_mutex);
|
||||
down_write(&trace_event_sem);
|
||||
__unregister_ftrace_event(event);
|
||||
up_write(&trace_event_mutex);
|
||||
up_write(&trace_event_sem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1217,6 +1259,64 @@ static struct trace_event trace_user_stack_event = {
|
||||
.funcs = &trace_user_stack_funcs,
|
||||
};
|
||||
|
||||
/* TRACE_BPUTS */
|
||||
static enum print_line_t
|
||||
trace_bputs_print(struct trace_iterator *iter, int flags,
|
||||
struct trace_event *event)
|
||||
{
|
||||
struct trace_entry *entry = iter->ent;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct bputs_entry *field;
|
||||
|
||||
trace_assign_type(field, entry);
|
||||
|
||||
if (!seq_print_ip_sym(s, field->ip, flags))
|
||||
goto partial;
|
||||
|
||||
if (!trace_seq_puts(s, ": "))
|
||||
goto partial;
|
||||
|
||||
if (!trace_seq_puts(s, field->str))
|
||||
goto partial;
|
||||
|
||||
return TRACE_TYPE_HANDLED;
|
||||
|
||||
partial:
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
|
||||
|
||||
static enum print_line_t
|
||||
trace_bputs_raw(struct trace_iterator *iter, int flags,
|
||||
struct trace_event *event)
|
||||
{
|
||||
struct bputs_entry *field;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
|
||||
trace_assign_type(field, iter->ent);
|
||||
|
||||
if (!trace_seq_printf(s, ": %lx : ", field->ip))
|
||||
goto partial;
|
||||
|
||||
if (!trace_seq_puts(s, field->str))
|
||||
goto partial;
|
||||
|
||||
return TRACE_TYPE_HANDLED;
|
||||
|
||||
partial:
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
|
||||
static struct trace_event_functions trace_bputs_funcs = {
|
||||
.trace = trace_bputs_print,
|
||||
.raw = trace_bputs_raw,
|
||||
};
|
||||
|
||||
static struct trace_event trace_bputs_event = {
|
||||
.type = TRACE_BPUTS,
|
||||
.funcs = &trace_bputs_funcs,
|
||||
};
|
||||
|
||||
/* TRACE_BPRINT */
|
||||
static enum print_line_t
|
||||
trace_bprint_print(struct trace_iterator *iter, int flags,
|
||||
@@ -1329,6 +1429,7 @@ static struct trace_event *events[] __initdata = {
|
||||
&trace_wake_event,
|
||||
&trace_stack_event,
|
||||
&trace_user_stack_event,
|
||||
&trace_bputs_event,
|
||||
&trace_bprint_event,
|
||||
&trace_print_event,
|
||||
NULL
|
||||
|
@@ -4,6 +4,8 @@
|
||||
#include <linux/trace_seq.h>
|
||||
#include "trace.h"
|
||||
|
||||
extern enum print_line_t
|
||||
trace_print_bputs_msg_only(struct trace_iterator *iter);
|
||||
extern enum print_line_t
|
||||
trace_print_bprintk_msg_only(struct trace_iterator *iter);
|
||||
extern enum print_line_t
|
||||
@@ -31,7 +33,7 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
|
||||
|
||||
/* used by module unregistering */
|
||||
extern int __unregister_ftrace_event(struct trace_event *event);
|
||||
extern struct rw_semaphore trace_event_mutex;
|
||||
extern struct rw_semaphore trace_event_sem;
|
||||
|
||||
#define MAX_MEMHEX_BYTES 8
|
||||
#define HEX_CHARS (MAX_MEMHEX_BYTES*2 + 1)
|
||||
|
@@ -28,7 +28,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
|
||||
unsigned long flags, int pc)
|
||||
{
|
||||
struct ftrace_event_call *call = &event_context_switch;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
struct ring_buffer_event *event;
|
||||
struct ctx_switch_entry *entry;
|
||||
|
||||
@@ -69,7 +69,7 @@ probe_sched_switch(void *ignore, struct task_struct *prev, struct task_struct *n
|
||||
pc = preempt_count();
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
data = ctx_trace->data[cpu];
|
||||
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
|
||||
|
||||
if (likely(!atomic_read(&data->disabled)))
|
||||
tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
|
||||
@@ -86,7 +86,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
|
||||
struct ftrace_event_call *call = &event_wakeup;
|
||||
struct ring_buffer_event *event;
|
||||
struct ctx_switch_entry *entry;
|
||||
struct ring_buffer *buffer = tr->buffer;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
|
||||
sizeof(*entry), flags, pc);
|
||||
@@ -123,7 +123,7 @@ probe_sched_wakeup(void *ignore, struct task_struct *wakee, int success)
|
||||
pc = preempt_count();
|
||||
local_irq_save(flags);
|
||||
cpu = raw_smp_processor_id();
|
||||
data = ctx_trace->data[cpu];
|
||||
data = per_cpu_ptr(ctx_trace->trace_buffer.data, cpu);
|
||||
|
||||
if (likely(!atomic_read(&data->disabled)))
|
||||
tracing_sched_wakeup_trace(ctx_trace, wakee, current,
|
||||
|
@@ -36,7 +36,8 @@ static void __wakeup_reset(struct trace_array *tr);
|
||||
static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
|
||||
static void wakeup_graph_return(struct ftrace_graph_ret *trace);
|
||||
|
||||
static int save_lat_flag;
|
||||
static int save_flags;
|
||||
static bool function_enabled;
|
||||
|
||||
#define TRACE_DISPLAY_GRAPH 1
|
||||
|
||||
@@ -89,7 +90,7 @@ func_prolog_preempt_disable(struct trace_array *tr,
|
||||
if (cpu != wakeup_current_cpu)
|
||||
goto out_enable;
|
||||
|
||||
*data = tr->data[cpu];
|
||||
*data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
disabled = atomic_inc_return(&(*data)->disabled);
|
||||
if (unlikely(disabled != 1))
|
||||
goto out;
|
||||
@@ -134,15 +135,60 @@ static struct ftrace_ops trace_ops __read_mostly =
|
||||
};
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
static int register_wakeup_function(int graph, int set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* 'set' is set if TRACE_ITER_FUNCTION is about to be set */
|
||||
if (function_enabled || (!set && !(trace_flags & TRACE_ITER_FUNCTION)))
|
||||
return 0;
|
||||
|
||||
if (graph)
|
||||
ret = register_ftrace_graph(&wakeup_graph_return,
|
||||
&wakeup_graph_entry);
|
||||
else
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
|
||||
if (!ret)
|
||||
function_enabled = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unregister_wakeup_function(int graph)
|
||||
{
|
||||
if (!function_enabled)
|
||||
return;
|
||||
|
||||
if (graph)
|
||||
unregister_ftrace_graph();
|
||||
else
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
|
||||
function_enabled = false;
|
||||
}
|
||||
|
||||
static void wakeup_function_set(int set)
|
||||
{
|
||||
if (set)
|
||||
register_wakeup_function(is_graph(), 1);
|
||||
else
|
||||
unregister_wakeup_function(is_graph());
|
||||
}
|
||||
|
||||
static int wakeup_flag_changed(struct tracer *tracer, u32 mask, int set)
|
||||
{
|
||||
if (mask & TRACE_ITER_FUNCTION)
|
||||
wakeup_function_set(set);
|
||||
|
||||
return trace_keep_overwrite(tracer, mask, set);
|
||||
}
|
||||
|
||||
static int start_func_tracer(int graph)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!graph)
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
else
|
||||
ret = register_ftrace_graph(&wakeup_graph_return,
|
||||
&wakeup_graph_entry);
|
||||
ret = register_wakeup_function(graph, 0);
|
||||
|
||||
if (!ret && tracing_is_enabled())
|
||||
tracer_enabled = 1;
|
||||
@@ -156,10 +202,7 @@ static void stop_func_tracer(int graph)
|
||||
{
|
||||
tracer_enabled = 0;
|
||||
|
||||
if (!graph)
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
else
|
||||
unregister_ftrace_graph();
|
||||
unregister_wakeup_function(graph);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
@@ -353,7 +396,7 @@ probe_wakeup_sched_switch(void *ignore,
|
||||
|
||||
/* disable local data, not wakeup_cpu data */
|
||||
cpu = raw_smp_processor_id();
|
||||
disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
|
||||
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
|
||||
if (likely(disabled != 1))
|
||||
goto out;
|
||||
|
||||
@@ -365,7 +408,7 @@ probe_wakeup_sched_switch(void *ignore,
|
||||
goto out_unlock;
|
||||
|
||||
/* The task we are waiting for is waking up */
|
||||
data = wakeup_trace->data[wakeup_cpu];
|
||||
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
|
||||
|
||||
__trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
|
||||
tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
|
||||
@@ -387,7 +430,7 @@ out_unlock:
|
||||
arch_spin_unlock(&wakeup_lock);
|
||||
local_irq_restore(flags);
|
||||
out:
|
||||
atomic_dec(&wakeup_trace->data[cpu]->disabled);
|
||||
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
|
||||
}
|
||||
|
||||
static void __wakeup_reset(struct trace_array *tr)
|
||||
@@ -405,7 +448,7 @@ static void wakeup_reset(struct trace_array *tr)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
tracing_reset_online_cpus(tr);
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&wakeup_lock);
|
||||
@@ -435,7 +478,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
|
||||
return;
|
||||
|
||||
pc = preempt_count();
|
||||
disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
|
||||
disabled = atomic_inc_return(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
|
||||
if (unlikely(disabled != 1))
|
||||
goto out;
|
||||
|
||||
@@ -458,7 +501,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
|
||||
|
||||
local_save_flags(flags);
|
||||
|
||||
data = wakeup_trace->data[wakeup_cpu];
|
||||
data = per_cpu_ptr(wakeup_trace->trace_buffer.data, wakeup_cpu);
|
||||
data->preempt_timestamp = ftrace_now(cpu);
|
||||
tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
|
||||
|
||||
@@ -472,7 +515,7 @@ probe_wakeup(void *ignore, struct task_struct *p, int success)
|
||||
out_locked:
|
||||
arch_spin_unlock(&wakeup_lock);
|
||||
out:
|
||||
atomic_dec(&wakeup_trace->data[cpu]->disabled);
|
||||
atomic_dec(&per_cpu_ptr(wakeup_trace->trace_buffer.data, cpu)->disabled);
|
||||
}
|
||||
|
||||
static void start_wakeup_tracer(struct trace_array *tr)
|
||||
@@ -540,8 +583,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)
|
||||
|
||||
static int __wakeup_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
|
||||
trace_flags |= TRACE_ITER_LATENCY_FMT;
|
||||
save_flags = trace_flags;
|
||||
|
||||
/* non overwrite screws up the latency tracers */
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
|
||||
|
||||
tracing_max_latency = 0;
|
||||
wakeup_trace = tr;
|
||||
@@ -563,12 +609,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)
|
||||
|
||||
static void wakeup_tracer_reset(struct trace_array *tr)
|
||||
{
|
||||
int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
|
||||
int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
|
||||
|
||||
stop_wakeup_tracer(tr);
|
||||
/* make sure we put back any tasks we are tracing */
|
||||
wakeup_reset(tr);
|
||||
|
||||
if (!save_lat_flag)
|
||||
trace_flags &= ~TRACE_ITER_LATENCY_FMT;
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
|
||||
}
|
||||
|
||||
static void wakeup_tracer_start(struct trace_array *tr)
|
||||
@@ -594,6 +643,7 @@ static struct tracer wakeup_tracer __read_mostly =
|
||||
.print_line = wakeup_print_line,
|
||||
.flags = &tracer_flags,
|
||||
.set_flag = wakeup_set_flag,
|
||||
.flag_changed = wakeup_flag_changed,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_wakeup,
|
||||
#endif
|
||||
@@ -615,6 +665,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
|
||||
.print_line = wakeup_print_line,
|
||||
.flags = &tracer_flags,
|
||||
.set_flag = wakeup_set_flag,
|
||||
.flag_changed = wakeup_flag_changed,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_wakeup,
|
||||
#endif
|
||||
|
@@ -21,13 +21,13 @@ static inline int trace_valid_entry(struct trace_entry *entry)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
|
||||
static int trace_test_buffer_cpu(struct trace_buffer *buf, int cpu)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct trace_entry *entry;
|
||||
unsigned int loops = 0;
|
||||
|
||||
while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
|
||||
while ((event = ring_buffer_consume(buf->buffer, cpu, NULL, NULL))) {
|
||||
entry = ring_buffer_event_data(event);
|
||||
|
||||
/*
|
||||
@@ -58,7 +58,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
|
||||
* Test the trace buffer to see if all the elements
|
||||
* are still sane.
|
||||
*/
|
||||
static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
|
||||
static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
|
||||
{
|
||||
unsigned long flags, cnt = 0;
|
||||
int cpu, ret = 0;
|
||||
@@ -67,7 +67,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
|
||||
cnt = ring_buffer_entries(tr->buffer);
|
||||
cnt = ring_buffer_entries(buf->buffer);
|
||||
|
||||
/*
|
||||
* The trace_test_buffer_cpu runs a while loop to consume all data.
|
||||
@@ -78,7 +78,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count)
|
||||
*/
|
||||
tracing_off();
|
||||
for_each_possible_cpu(cpu) {
|
||||
ret = trace_test_buffer_cpu(tr, cpu);
|
||||
ret = trace_test_buffer_cpu(buf, cpu);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@@ -355,7 +355,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
msleep(100);
|
||||
|
||||
/* we should have nothing in the buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, &count);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -376,7 +376,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
ftrace_enabled = 0;
|
||||
|
||||
/* check the trace buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, &count);
|
||||
tracing_start();
|
||||
|
||||
/* we should only have one item */
|
||||
@@ -666,7 +666,7 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
|
||||
ftrace_enabled = 0;
|
||||
|
||||
/* check the trace buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, &count);
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
@@ -703,8 +703,6 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
|
||||
/* Maximum number of functions to trace before diagnosing a hang */
|
||||
#define GRAPH_MAX_FUNC_TEST 100000000
|
||||
|
||||
static void
|
||||
__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
|
||||
static unsigned int graph_hang_thresh;
|
||||
|
||||
/* Wrap the real function entry probe to avoid possible hanging */
|
||||
@@ -714,8 +712,11 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
|
||||
if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
|
||||
ftrace_graph_stop();
|
||||
printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
|
||||
if (ftrace_dump_on_oops)
|
||||
__ftrace_dump(false, DUMP_ALL);
|
||||
if (ftrace_dump_on_oops) {
|
||||
ftrace_dump(DUMP_ALL);
|
||||
/* ftrace_dump() disables tracing */
|
||||
tracing_on();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -737,7 +738,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
|
||||
* Simulate the init() callback but we attach a watchdog callback
|
||||
* to detect and recover from possible hangs
|
||||
*/
|
||||
tracing_reset_online_cpus(tr);
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
set_graph_array(tr);
|
||||
ret = register_ftrace_graph(&trace_graph_return,
|
||||
&trace_graph_entry_watchdog);
|
||||
@@ -760,7 +761,7 @@ trace_selftest_startup_function_graph(struct tracer *trace,
|
||||
tracing_stop();
|
||||
|
||||
/* check the trace buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, &count);
|
||||
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
@@ -815,9 +816,9 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check both trace buffers */
|
||||
ret = trace_test_buffer(tr, NULL);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, NULL);
|
||||
if (!ret)
|
||||
ret = trace_test_buffer(&max_tr, &count);
|
||||
ret = trace_test_buffer(&tr->max_buffer, &count);
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
@@ -877,9 +878,9 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check both trace buffers */
|
||||
ret = trace_test_buffer(tr, NULL);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, NULL);
|
||||
if (!ret)
|
||||
ret = trace_test_buffer(&max_tr, &count);
|
||||
ret = trace_test_buffer(&tr->max_buffer, &count);
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
@@ -943,11 +944,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check both trace buffers */
|
||||
ret = trace_test_buffer(tr, NULL);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = trace_test_buffer(&max_tr, &count);
|
||||
ret = trace_test_buffer(&tr->max_buffer, &count);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -973,11 +974,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check both trace buffers */
|
||||
ret = trace_test_buffer(tr, NULL);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = trace_test_buffer(&max_tr, &count);
|
||||
ret = trace_test_buffer(&tr->max_buffer, &count);
|
||||
|
||||
if (!ret && !count) {
|
||||
printk(KERN_CONT ".. no entries found ..");
|
||||
@@ -1084,10 +1085,10 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check both trace buffers */
|
||||
ret = trace_test_buffer(tr, NULL);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, NULL);
|
||||
printk("ret = %d\n", ret);
|
||||
if (!ret)
|
||||
ret = trace_test_buffer(&max_tr, &count);
|
||||
ret = trace_test_buffer(&tr->max_buffer, &count);
|
||||
|
||||
|
||||
trace->reset(tr);
|
||||
@@ -1126,7 +1127,7 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr
|
||||
/* stop the tracing. */
|
||||
tracing_stop();
|
||||
/* check the trace buffer */
|
||||
ret = trace_test_buffer(tr, &count);
|
||||
ret = trace_test_buffer(&tr->trace_buffer, &count);
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
|
@@ -20,13 +20,24 @@
|
||||
|
||||
#define STACK_TRACE_ENTRIES 500
|
||||
|
||||
#ifdef CC_USING_FENTRY
|
||||
# define fentry 1
|
||||
#else
|
||||
# define fentry 0
|
||||
#endif
|
||||
|
||||
static unsigned long stack_dump_trace[STACK_TRACE_ENTRIES+1] =
|
||||
{ [0 ... (STACK_TRACE_ENTRIES)] = ULONG_MAX };
|
||||
static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
|
||||
|
||||
/*
|
||||
* Reserve one entry for the passed in ip. This will allow
|
||||
* us to remove most or all of the stack size overhead
|
||||
* added by the stack tracer itself.
|
||||
*/
|
||||
static struct stack_trace max_stack_trace = {
|
||||
.max_entries = STACK_TRACE_ENTRIES,
|
||||
.entries = stack_dump_trace,
|
||||
.max_entries = STACK_TRACE_ENTRIES - 1,
|
||||
.entries = &stack_dump_trace[1],
|
||||
};
|
||||
|
||||
static unsigned long max_stack_size;
|
||||
@@ -39,25 +50,34 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
|
||||
int stack_tracer_enabled;
|
||||
static int last_stack_tracer_enabled;
|
||||
|
||||
static inline void check_stack(void)
|
||||
static inline void
|
||||
check_stack(unsigned long ip, unsigned long *stack)
|
||||
{
|
||||
unsigned long this_size, flags;
|
||||
unsigned long *p, *top, *start;
|
||||
static int tracer_frame;
|
||||
int frame_size = ACCESS_ONCE(tracer_frame);
|
||||
int i;
|
||||
|
||||
this_size = ((unsigned long)&this_size) & (THREAD_SIZE-1);
|
||||
this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
|
||||
this_size = THREAD_SIZE - this_size;
|
||||
/* Remove the frame of the tracer */
|
||||
this_size -= frame_size;
|
||||
|
||||
if (this_size <= max_stack_size)
|
||||
return;
|
||||
|
||||
/* we do not handle interrupt stacks yet */
|
||||
if (!object_is_on_stack(&this_size))
|
||||
if (!object_is_on_stack(stack))
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&max_stack_lock);
|
||||
|
||||
/* In case another CPU set the tracer_frame on us */
|
||||
if (unlikely(!frame_size))
|
||||
this_size -= tracer_frame;
|
||||
|
||||
/* a race could have already updated it */
|
||||
if (this_size <= max_stack_size)
|
||||
goto out;
|
||||
@@ -69,11 +89,19 @@ static inline void check_stack(void)
|
||||
|
||||
save_stack_trace(&max_stack_trace);
|
||||
|
||||
/*
|
||||
* Add the passed in ip from the function tracer.
|
||||
* Searching for this on the stack will skip over
|
||||
* most of the overhead from the stack tracer itself.
|
||||
*/
|
||||
stack_dump_trace[0] = ip;
|
||||
max_stack_trace.nr_entries++;
|
||||
|
||||
/*
|
||||
* Now find where in the stack these are.
|
||||
*/
|
||||
i = 0;
|
||||
start = &this_size;
|
||||
start = stack;
|
||||
top = (unsigned long *)
|
||||
(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
|
||||
|
||||
@@ -97,6 +125,18 @@ static inline void check_stack(void)
|
||||
found = 1;
|
||||
/* Start the search from here */
|
||||
start = p + 1;
|
||||
/*
|
||||
* We do not want to show the overhead
|
||||
* of the stack tracer stack in the
|
||||
* max stack. If we haven't figured
|
||||
* out what that is, then figure it out
|
||||
* now.
|
||||
*/
|
||||
if (unlikely(!tracer_frame) && i == 1) {
|
||||
tracer_frame = (p - stack) *
|
||||
sizeof(unsigned long);
|
||||
max_stack_size -= tracer_frame;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -113,6 +153,7 @@ static void
|
||||
stack_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct pt_regs *pt_regs)
|
||||
{
|
||||
unsigned long stack;
|
||||
int cpu;
|
||||
|
||||
preempt_disable_notrace();
|
||||
@@ -122,7 +163,26 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
if (per_cpu(trace_active, cpu)++ != 0)
|
||||
goto out;
|
||||
|
||||
check_stack();
|
||||
/*
|
||||
* When fentry is used, the traced function does not get
|
||||
* its stack frame set up, and we lose the parent.
|
||||
* The ip is pretty useless because the function tracer
|
||||
* was called before that function set up its stack frame.
|
||||
* In this case, we use the parent ip.
|
||||
*
|
||||
* By adding the return address of either the parent ip
|
||||
* or the current ip we can disregard most of the stack usage
|
||||
* caused by the stack tracer itself.
|
||||
*
|
||||
* The function tracer always reports the address of where the
|
||||
* mcount call was, but the stack will hold the return address.
|
||||
*/
|
||||
if (fentry)
|
||||
ip = parent_ip;
|
||||
else
|
||||
ip += MCOUNT_INSN_SIZE;
|
||||
|
||||
check_stack(ip, &stack);
|
||||
|
||||
out:
|
||||
per_cpu(trace_active, cpu)--;
|
||||
@@ -322,7 +382,7 @@ static const struct file_operations stack_trace_filter_fops = {
|
||||
.open = stack_trace_filter_open,
|
||||
.read = seq_read,
|
||||
.write = ftrace_filter_write,
|
||||
.llseek = ftrace_regex_lseek,
|
||||
.llseek = ftrace_filter_lseek,
|
||||
.release = ftrace_regex_release,
|
||||
};
|
||||
|
||||
@@ -371,6 +431,8 @@ static __init int stack_trace_init(void)
|
||||
struct dentry *d_tracer;
|
||||
|
||||
d_tracer = tracing_init_dentry();
|
||||
if (!d_tracer)
|
||||
return 0;
|
||||
|
||||
trace_create_file("stack_max_size", 0644, d_tracer,
|
||||
&max_stack_size, &stack_max_size_fops);
|
||||
|
@@ -307,6 +307,8 @@ static int tracing_stat_init(void)
|
||||
struct dentry *d_tracing;
|
||||
|
||||
d_tracing = tracing_init_dentry();
|
||||
if (!d_tracing)
|
||||
return 0;
|
||||
|
||||
stat_dir = debugfs_create_dir("trace_stat", d_tracing);
|
||||
if (!stat_dir)
|
||||
|
@@ -12,10 +12,6 @@
|
||||
#include "trace.h"
|
||||
|
||||
static DEFINE_MUTEX(syscall_trace_lock);
|
||||
static int sys_refcount_enter;
|
||||
static int sys_refcount_exit;
|
||||
static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
|
||||
static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
|
||||
|
||||
static int syscall_enter_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data);
|
||||
@@ -41,7 +37,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
|
||||
/*
|
||||
* Only compare after the "sys" prefix. Archs that use
|
||||
* syscall wrappers may have syscalls symbols aliases prefixed
|
||||
* with "SyS" instead of "sys", leading to an unwanted
|
||||
* with ".SyS" or ".sys" instead of "sys", leading to an unwanted
|
||||
* mismatch.
|
||||
*/
|
||||
return !strcmp(sym + 3, name + 3);
|
||||
@@ -265,7 +261,7 @@ static void free_syscall_print_fmt(struct ftrace_event_call *call)
|
||||
kfree(call->print_fmt);
|
||||
}
|
||||
|
||||
static int syscall_enter_define_fields(struct ftrace_event_call *call)
|
||||
static int __init syscall_enter_define_fields(struct ftrace_event_call *call)
|
||||
{
|
||||
struct syscall_trace_enter trace;
|
||||
struct syscall_metadata *meta = call->data;
|
||||
@@ -288,7 +284,7 @@ static int syscall_enter_define_fields(struct ftrace_event_call *call)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int syscall_exit_define_fields(struct ftrace_event_call *call)
|
||||
static int __init syscall_exit_define_fields(struct ftrace_event_call *call)
|
||||
{
|
||||
struct syscall_trace_exit trace;
|
||||
int ret;
|
||||
@@ -303,8 +299,9 @@ static int syscall_exit_define_fields(struct ftrace_event_call *call)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
|
||||
{
|
||||
struct trace_array *tr = data;
|
||||
struct syscall_trace_enter *entry;
|
||||
struct syscall_metadata *sys_data;
|
||||
struct ring_buffer_event *event;
|
||||
@@ -315,7 +312,7 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
syscall_nr = trace_get_syscall_nr(current, regs);
|
||||
if (syscall_nr < 0)
|
||||
return;
|
||||
if (!test_bit(syscall_nr, enabled_enter_syscalls))
|
||||
if (!test_bit(syscall_nr, tr->enabled_enter_syscalls))
|
||||
return;
|
||||
|
||||
sys_data = syscall_nr_to_meta(syscall_nr);
|
||||
@@ -324,7 +321,8 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
|
||||
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
|
||||
|
||||
event = trace_current_buffer_lock_reserve(&buffer,
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer,
|
||||
sys_data->enter_event->event.type, size, 0, 0);
|
||||
if (!event)
|
||||
return;
|
||||
@@ -338,8 +336,9 @@ static void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
}
|
||||
|
||||
static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
||||
{
|
||||
struct trace_array *tr = data;
|
||||
struct syscall_trace_exit *entry;
|
||||
struct syscall_metadata *sys_data;
|
||||
struct ring_buffer_event *event;
|
||||
@@ -349,14 +348,15 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
syscall_nr = trace_get_syscall_nr(current, regs);
|
||||
if (syscall_nr < 0)
|
||||
return;
|
||||
if (!test_bit(syscall_nr, enabled_exit_syscalls))
|
||||
if (!test_bit(syscall_nr, tr->enabled_exit_syscalls))
|
||||
return;
|
||||
|
||||
sys_data = syscall_nr_to_meta(syscall_nr);
|
||||
if (!sys_data)
|
||||
return;
|
||||
|
||||
event = trace_current_buffer_lock_reserve(&buffer,
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer,
|
||||
sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
|
||||
if (!event)
|
||||
return;
|
||||
@@ -370,8 +370,10 @@ static void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
}
|
||||
|
||||
static int reg_event_syscall_enter(struct ftrace_event_call *call)
|
||||
static int reg_event_syscall_enter(struct ftrace_event_file *file,
|
||||
struct ftrace_event_call *call)
|
||||
{
|
||||
struct trace_array *tr = file->tr;
|
||||
int ret = 0;
|
||||
int num;
|
||||
|
||||
@@ -379,33 +381,37 @@ static int reg_event_syscall_enter(struct ftrace_event_call *call)
|
||||
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
||||
return -ENOSYS;
|
||||
mutex_lock(&syscall_trace_lock);
|
||||
if (!sys_refcount_enter)
|
||||
ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
|
||||
if (!tr->sys_refcount_enter)
|
||||
ret = register_trace_sys_enter(ftrace_syscall_enter, tr);
|
||||
if (!ret) {
|
||||
set_bit(num, enabled_enter_syscalls);
|
||||
sys_refcount_enter++;
|
||||
set_bit(num, tr->enabled_enter_syscalls);
|
||||
tr->sys_refcount_enter++;
|
||||
}
|
||||
mutex_unlock(&syscall_trace_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unreg_event_syscall_enter(struct ftrace_event_call *call)
|
||||
static void unreg_event_syscall_enter(struct ftrace_event_file *file,
|
||||
struct ftrace_event_call *call)
|
||||
{
|
||||
struct trace_array *tr = file->tr;
|
||||
int num;
|
||||
|
||||
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
||||
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
||||
return;
|
||||
mutex_lock(&syscall_trace_lock);
|
||||
sys_refcount_enter--;
|
||||
clear_bit(num, enabled_enter_syscalls);
|
||||
if (!sys_refcount_enter)
|
||||
unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
|
||||
tr->sys_refcount_enter--;
|
||||
clear_bit(num, tr->enabled_enter_syscalls);
|
||||
if (!tr->sys_refcount_enter)
|
||||
unregister_trace_sys_enter(ftrace_syscall_enter, tr);
|
||||
mutex_unlock(&syscall_trace_lock);
|
||||
}
|
||||
|
||||
static int reg_event_syscall_exit(struct ftrace_event_call *call)
|
||||
static int reg_event_syscall_exit(struct ftrace_event_file *file,
|
||||
struct ftrace_event_call *call)
|
||||
{
|
||||
struct trace_array *tr = file->tr;
|
||||
int ret = 0;
|
||||
int num;
|
||||
|
||||
@@ -413,28 +419,30 @@ static int reg_event_syscall_exit(struct ftrace_event_call *call)
|
||||
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
||||
return -ENOSYS;
|
||||
mutex_lock(&syscall_trace_lock);
|
||||
if (!sys_refcount_exit)
|
||||
ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
|
||||
if (!tr->sys_refcount_exit)
|
||||
ret = register_trace_sys_exit(ftrace_syscall_exit, tr);
|
||||
if (!ret) {
|
||||
set_bit(num, enabled_exit_syscalls);
|
||||
sys_refcount_exit++;
|
||||
set_bit(num, tr->enabled_exit_syscalls);
|
||||
tr->sys_refcount_exit++;
|
||||
}
|
||||
mutex_unlock(&syscall_trace_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unreg_event_syscall_exit(struct ftrace_event_call *call)
|
||||
static void unreg_event_syscall_exit(struct ftrace_event_file *file,
|
||||
struct ftrace_event_call *call)
|
||||
{
|
||||
struct trace_array *tr = file->tr;
|
||||
int num;
|
||||
|
||||
num = ((struct syscall_metadata *)call->data)->syscall_nr;
|
||||
if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls))
|
||||
return;
|
||||
mutex_lock(&syscall_trace_lock);
|
||||
sys_refcount_exit--;
|
||||
clear_bit(num, enabled_exit_syscalls);
|
||||
if (!sys_refcount_exit)
|
||||
unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
|
||||
tr->sys_refcount_exit--;
|
||||
clear_bit(num, tr->enabled_exit_syscalls);
|
||||
if (!tr->sys_refcount_exit)
|
||||
unregister_trace_sys_exit(ftrace_syscall_exit, tr);
|
||||
mutex_unlock(&syscall_trace_lock);
|
||||
}
|
||||
|
||||
@@ -471,7 +479,7 @@ struct trace_event_functions exit_syscall_print_funcs = {
|
||||
.trace = print_syscall_exit,
|
||||
};
|
||||
|
||||
struct ftrace_event_class event_class_syscall_enter = {
|
||||
struct ftrace_event_class __refdata event_class_syscall_enter = {
|
||||
.system = "syscalls",
|
||||
.reg = syscall_enter_register,
|
||||
.define_fields = syscall_enter_define_fields,
|
||||
@@ -479,7 +487,7 @@ struct ftrace_event_class event_class_syscall_enter = {
|
||||
.raw_init = init_syscall_trace,
|
||||
};
|
||||
|
||||
struct ftrace_event_class event_class_syscall_exit = {
|
||||
struct ftrace_event_class __refdata event_class_syscall_exit = {
|
||||
.system = "syscalls",
|
||||
.reg = syscall_exit_register,
|
||||
.define_fields = syscall_exit_define_fields,
|
||||
@@ -685,11 +693,13 @@ static void perf_sysexit_disable(struct ftrace_event_call *call)
|
||||
static int syscall_enter_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
{
|
||||
struct ftrace_event_file *file = data;
|
||||
|
||||
switch (type) {
|
||||
case TRACE_REG_REGISTER:
|
||||
return reg_event_syscall_enter(event);
|
||||
return reg_event_syscall_enter(file, event);
|
||||
case TRACE_REG_UNREGISTER:
|
||||
unreg_event_syscall_enter(event);
|
||||
unreg_event_syscall_enter(file, event);
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
@@ -711,11 +721,13 @@ static int syscall_enter_register(struct ftrace_event_call *event,
|
||||
static int syscall_exit_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
{
|
||||
struct ftrace_event_file *file = data;
|
||||
|
||||
switch (type) {
|
||||
case TRACE_REG_REGISTER:
|
||||
return reg_event_syscall_exit(event);
|
||||
return reg_event_syscall_exit(file, event);
|
||||
case TRACE_REG_UNREGISTER:
|
||||
unreg_event_syscall_exit(event);
|
||||
unreg_event_syscall_exit(file, event);
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
@@ -28,6 +28,18 @@
|
||||
|
||||
#define UPROBE_EVENT_SYSTEM "uprobes"
|
||||
|
||||
struct uprobe_trace_entry_head {
|
||||
struct trace_entry ent;
|
||||
unsigned long vaddr[];
|
||||
};
|
||||
|
||||
#define SIZEOF_TRACE_ENTRY(is_return) \
|
||||
(sizeof(struct uprobe_trace_entry_head) + \
|
||||
sizeof(unsigned long) * (is_return ? 2 : 1))
|
||||
|
||||
#define DATAOF_TRACE_ENTRY(entry, is_return) \
|
||||
((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return))
|
||||
|
||||
struct trace_uprobe_filter {
|
||||
rwlock_t rwlock;
|
||||
int nr_systemwide;
|
||||
@@ -64,6 +76,8 @@ static DEFINE_MUTEX(uprobe_lock);
|
||||
static LIST_HEAD(uprobe_list);
|
||||
|
||||
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs);
|
||||
|
||||
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
|
||||
{
|
||||
@@ -77,11 +91,16 @@ static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
|
||||
return !filter->nr_systemwide && list_empty(&filter->perf_events);
|
||||
}
|
||||
|
||||
static inline bool is_ret_probe(struct trace_uprobe *tu)
|
||||
{
|
||||
return tu->consumer.ret_handler != NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate new trace_uprobe and initialize it (including uprobes).
|
||||
*/
|
||||
static struct trace_uprobe *
|
||||
alloc_trace_uprobe(const char *group, const char *event, int nargs)
|
||||
alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
@@ -106,6 +125,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
|
||||
|
||||
INIT_LIST_HEAD(&tu->list);
|
||||
tu->consumer.handler = uprobe_dispatcher;
|
||||
if (is_ret)
|
||||
tu->consumer.ret_handler = uretprobe_dispatcher;
|
||||
init_trace_uprobe_filter(&tu->filter);
|
||||
return tu;
|
||||
|
||||
@@ -180,7 +201,7 @@ end:
|
||||
|
||||
/*
|
||||
* Argument syntax:
|
||||
* - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS]
|
||||
* - Add uprobe: p|r[:[GRP/]EVENT] PATH:SYMBOL [FETCHARGS]
|
||||
*
|
||||
* - Remove uprobe: -:[GRP/]EVENT
|
||||
*/
|
||||
@@ -192,20 +213,23 @@ static int create_trace_uprobe(int argc, char **argv)
|
||||
char buf[MAX_EVENT_NAME_LEN];
|
||||
struct path path;
|
||||
unsigned long offset;
|
||||
bool is_delete;
|
||||
bool is_delete, is_return;
|
||||
int i, ret;
|
||||
|
||||
inode = NULL;
|
||||
ret = 0;
|
||||
is_delete = false;
|
||||
is_return = false;
|
||||
event = NULL;
|
||||
group = NULL;
|
||||
|
||||
/* argc must be >= 1 */
|
||||
if (argv[0][0] == '-')
|
||||
is_delete = true;
|
||||
else if (argv[0][0] == 'r')
|
||||
is_return = true;
|
||||
else if (argv[0][0] != 'p') {
|
||||
pr_info("Probe definition must be started with 'p' or '-'.\n");
|
||||
pr_info("Probe definition must be started with 'p', 'r' or '-'.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -303,7 +327,7 @@ static int create_trace_uprobe(int argc, char **argv)
|
||||
kfree(tail);
|
||||
}
|
||||
|
||||
tu = alloc_trace_uprobe(group, event, argc);
|
||||
tu = alloc_trace_uprobe(group, event, argc, is_return);
|
||||
if (IS_ERR(tu)) {
|
||||
pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu));
|
||||
ret = PTR_ERR(tu);
|
||||
@@ -414,9 +438,10 @@ static void probes_seq_stop(struct seq_file *m, void *v)
|
||||
static int probes_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct trace_uprobe *tu = v;
|
||||
char c = is_ret_probe(tu) ? 'r' : 'p';
|
||||
int i;
|
||||
|
||||
seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name);
|
||||
seq_printf(m, "%c:%s/%s", c, tu->call.class->system, tu->call.name);
|
||||
seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset);
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
@@ -485,65 +510,81 @@ static const struct file_operations uprobe_profile_ops = {
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
/* uprobe handler */
|
||||
static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
static void uprobe_trace_print(struct trace_uprobe *tu,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer;
|
||||
u8 *data;
|
||||
int size, i, pc;
|
||||
unsigned long irq_flags;
|
||||
void *data;
|
||||
int size, i;
|
||||
struct ftrace_event_call *call = &tu->call;
|
||||
|
||||
local_save_flags(irq_flags);
|
||||
pc = preempt_count();
|
||||
|
||||
size = sizeof(*entry) + tu->size;
|
||||
|
||||
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
|
||||
size, irq_flags, pc);
|
||||
size + tu->size, 0, 0);
|
||||
if (!event)
|
||||
return 0;
|
||||
return;
|
||||
|
||||
entry = ring_buffer_event_data(event);
|
||||
entry->ip = instruction_pointer(task_pt_regs(current));
|
||||
data = (u8 *)&entry[1];
|
||||
if (is_ret_probe(tu)) {
|
||||
entry->vaddr[0] = func;
|
||||
entry->vaddr[1] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
entry->vaddr[0] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
|
||||
|
||||
if (!filter_current_check_discard(buffer, call, entry, event))
|
||||
trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
|
||||
trace_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
}
|
||||
|
||||
/* uprobe handler */
|
||||
static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
{
|
||||
if (!is_ret_probe(tu))
|
||||
uprobe_trace_print(tu, 0, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
uprobe_trace_print(tu, func, regs);
|
||||
}
|
||||
|
||||
/* Event entry printers */
|
||||
static enum print_line_t
|
||||
print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event)
|
||||
{
|
||||
struct uprobe_trace_entry_head *field;
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
struct trace_uprobe *tu;
|
||||
u8 *data;
|
||||
int i;
|
||||
|
||||
field = (struct uprobe_trace_entry_head *)iter->ent;
|
||||
entry = (struct uprobe_trace_entry_head *)iter->ent;
|
||||
tu = container_of(event, struct trace_uprobe, call.event);
|
||||
|
||||
if (!trace_seq_printf(s, "%s: (", tu->call.name))
|
||||
goto partial;
|
||||
if (is_ret_probe(tu)) {
|
||||
if (!trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", tu->call.name,
|
||||
entry->vaddr[1], entry->vaddr[0]))
|
||||
goto partial;
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
if (!trace_seq_printf(s, "%s: (0x%lx)", tu->call.name,
|
||||
entry->vaddr[0]))
|
||||
goto partial;
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
|
||||
goto partial;
|
||||
|
||||
if (!trace_seq_puts(s, ")"))
|
||||
goto partial;
|
||||
|
||||
data = (u8 *)&field[1];
|
||||
for (i = 0; i < tu->nr_args; i++) {
|
||||
if (!tu->args[i].type->print(s, tu->args[i].name,
|
||||
data + tu->args[i].offset, field))
|
||||
data + tu->args[i].offset, entry))
|
||||
goto partial;
|
||||
}
|
||||
|
||||
@@ -595,16 +636,23 @@ static void probe_event_disable(struct trace_uprobe *tu, int flag)
|
||||
|
||||
static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
|
||||
{
|
||||
int ret, i;
|
||||
int ret, i, size;
|
||||
struct uprobe_trace_entry_head field;
|
||||
struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data;
|
||||
struct trace_uprobe *tu = event_call->data;
|
||||
|
||||
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
|
||||
if (is_ret_probe(tu)) {
|
||||
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0);
|
||||
DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0);
|
||||
size = SIZEOF_TRACE_ENTRY(true);
|
||||
} else {
|
||||
DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0);
|
||||
size = SIZEOF_TRACE_ENTRY(false);
|
||||
}
|
||||
/* Set argument names as fields */
|
||||
for (i = 0; i < tu->nr_args; i++) {
|
||||
ret = trace_define_field(event_call, tu->args[i].type->fmttype,
|
||||
tu->args[i].name,
|
||||
sizeof(field) + tu->args[i].offset,
|
||||
size + tu->args[i].offset,
|
||||
tu->args[i].type->size,
|
||||
tu->args[i].type->is_signed,
|
||||
FILTER_OTHER);
|
||||
@@ -622,8 +670,13 @@ static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len)
|
||||
int i;
|
||||
int pos = 0;
|
||||
|
||||
fmt = "(%lx)";
|
||||
arg = "REC->" FIELD_STRING_IP;
|
||||
if (is_ret_probe(tu)) {
|
||||
fmt = "(%lx <- %lx)";
|
||||
arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
|
||||
} else {
|
||||
fmt = "(%lx)";
|
||||
arg = "REC->" FIELD_STRING_IP;
|
||||
}
|
||||
|
||||
/* When len=0, we just calculate the needed length */
|
||||
|
||||
@@ -752,49 +805,68 @@ static bool uprobe_perf_filter(struct uprobe_consumer *uc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* uprobe profile handler */
|
||||
static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
static void uprobe_perf_print(struct trace_uprobe *tu,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct ftrace_event_call *call = &tu->call;
|
||||
struct uprobe_trace_entry_head *entry;
|
||||
struct hlist_head *head;
|
||||
u8 *data;
|
||||
int size, __size, i;
|
||||
int rctx;
|
||||
void *data;
|
||||
int size, rctx, i;
|
||||
|
||||
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
|
||||
__size = sizeof(*entry) + tu->size;
|
||||
size = ALIGN(__size + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
|
||||
return 0;
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
if (hlist_empty(head))
|
||||
goto out;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
goto out;
|
||||
|
||||
entry->ip = instruction_pointer(task_pt_regs(current));
|
||||
data = (u8 *)&entry[1];
|
||||
if (is_ret_probe(tu)) {
|
||||
entry->vaddr[0] = func;
|
||||
entry->vaddr[1] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, true);
|
||||
} else {
|
||||
entry->vaddr[0] = instruction_pointer(regs);
|
||||
data = DATAOF_TRACE_ENTRY(entry, false);
|
||||
}
|
||||
|
||||
for (i = 0; i < tu->nr_args; i++)
|
||||
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head, NULL);
|
||||
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/* uprobe profile handler */
|
||||
static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
|
||||
{
|
||||
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
|
||||
if (!is_ret_probe(tu))
|
||||
uprobe_perf_print(tu, 0, regs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
uprobe_perf_print(tu, func, regs);
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
static
|
||||
int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data)
|
||||
{
|
||||
struct trace_uprobe *tu = (struct trace_uprobe *)event->data;
|
||||
struct trace_uprobe *tu = event->data;
|
||||
|
||||
switch (type) {
|
||||
case TRACE_REG_REGISTER:
|
||||
@@ -843,6 +915,23 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int uretprobe_dispatcher(struct uprobe_consumer *con,
|
||||
unsigned long func, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_uprobe *tu;
|
||||
|
||||
tu = container_of(con, struct trace_uprobe, consumer);
|
||||
|
||||
if (tu->flags & TP_FLAG_TRACE)
|
||||
uretprobe_trace_func(tu, func, regs);
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if (tu->flags & TP_FLAG_PROFILE)
|
||||
uretprobe_perf_func(tu, func, regs);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct trace_event_functions uprobe_funcs = {
|
||||
.trace = print_uprobe_event
|
||||
};
|
||||
|
@@ -112,7 +112,8 @@ tracepoint_entry_add_probe(struct tracepoint_entry *entry,
|
||||
int nr_probes = 0;
|
||||
struct tracepoint_func *old, *new;
|
||||
|
||||
WARN_ON(!probe);
|
||||
if (WARN_ON(!probe))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
debug_print_probes(entry);
|
||||
old = entry->funcs;
|
||||
@@ -152,13 +153,18 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
|
||||
|
||||
debug_print_probes(entry);
|
||||
/* (N -> M), (N > 1, M >= 0) probes */
|
||||
for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
|
||||
if (!probe ||
|
||||
(old[nr_probes].func == probe &&
|
||||
old[nr_probes].data == data))
|
||||
nr_del++;
|
||||
if (probe) {
|
||||
for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
|
||||
if (old[nr_probes].func == probe &&
|
||||
old[nr_probes].data == data)
|
||||
nr_del++;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If probe is NULL, then nr_probes = nr_del = 0, and then the
|
||||
* entire entry will be removed.
|
||||
*/
|
||||
if (nr_probes - nr_del == 0) {
|
||||
/* N -> 0, (N > 1) */
|
||||
entry->funcs = NULL;
|
||||
@@ -173,8 +179,7 @@ tracepoint_entry_remove_probe(struct tracepoint_entry *entry,
|
||||
if (new == NULL)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
for (i = 0; old[i].func; i++)
|
||||
if (probe &&
|
||||
(old[i].func != probe || old[i].data != data))
|
||||
if (old[i].func != probe || old[i].data != data)
|
||||
new[j++] = old[i];
|
||||
new[nr_probes - nr_del].func = NULL;
|
||||
entry->refcount = nr_probes - nr_del;
|
||||
|
@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
|
||||
.owner = GLOBAL_ROOT_UID,
|
||||
.group = GLOBAL_ROOT_GID,
|
||||
.proc_inum = PROC_USER_INIT_INO,
|
||||
.may_mount_sysfs = true,
|
||||
.may_mount_proc = true,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(init_user_ns);
|
||||
|
||||
|
@@ -21,10 +21,12 @@
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/projid.h>
|
||||
#include <linux/fs_struct.h>
|
||||
|
||||
static struct kmem_cache *user_ns_cachep __read_mostly;
|
||||
|
||||
static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
|
||||
static bool new_idmap_permitted(const struct file *file,
|
||||
struct user_namespace *ns, int cap_setid,
|
||||
struct uid_gid_map *map);
|
||||
|
||||
static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
|
||||
@@ -60,6 +62,15 @@ int create_user_ns(struct cred *new)
|
||||
kgid_t group = new->egid;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Verify that we can not violate the policy of which files
|
||||
* may be accessed that is specified by the root directory,
|
||||
* by verifing that the root directory is at the root of the
|
||||
* mount namespace which allows all files to be accessed.
|
||||
*/
|
||||
if (current_chrooted())
|
||||
return -EPERM;
|
||||
|
||||
/* The creator needs a mapping in the parent user namespace
|
||||
* or else we won't be able to reasonably tell userspace who
|
||||
* created a user_namespace.
|
||||
@@ -86,6 +97,8 @@ int create_user_ns(struct cred *new)
|
||||
|
||||
set_cred_user_ns(new, ns);
|
||||
|
||||
update_mnt_policy(ns);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -600,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,
|
||||
if (map->nr_extents != 0)
|
||||
goto out;
|
||||
|
||||
/* Require the appropriate privilege CAP_SETUID or CAP_SETGID
|
||||
* over the user namespace in order to set the id mapping.
|
||||
/*
|
||||
* Adjusting namespace settings requires capabilities on the target.
|
||||
*/
|
||||
if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid))
|
||||
if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
|
||||
goto out;
|
||||
|
||||
/* Get a buffer */
|
||||
@@ -688,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
|
||||
|
||||
ret = -EPERM;
|
||||
/* Validate the user is allowed to use user id's mapped to. */
|
||||
if (!new_idmap_permitted(ns, cap_setid, &new_map))
|
||||
if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
|
||||
goto out;
|
||||
|
||||
/* Map the lower ids from the parent user namespace to the
|
||||
@@ -775,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
|
||||
&ns->projid_map, &ns->parent->projid_map);
|
||||
}
|
||||
|
||||
static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
|
||||
static bool new_idmap_permitted(const struct file *file,
|
||||
struct user_namespace *ns, int cap_setid,
|
||||
struct uid_gid_map *new_map)
|
||||
{
|
||||
/* Allow mapping to your own filesystem ids */
|
||||
@@ -783,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
|
||||
u32 id = new_map->extent[0].lower_first;
|
||||
if (cap_setid == CAP_SETUID) {
|
||||
kuid_t uid = make_kuid(ns->parent, id);
|
||||
if (uid_eq(uid, current_fsuid()))
|
||||
if (uid_eq(uid, file->f_cred->fsuid))
|
||||
return true;
|
||||
}
|
||||
else if (cap_setid == CAP_SETGID) {
|
||||
kgid_t gid = make_kgid(ns->parent, id);
|
||||
if (gid_eq(gid, current_fsgid()))
|
||||
if (gid_eq(gid, file->f_cred->fsgid))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -799,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
|
||||
|
||||
/* Allow the specified ids if we have the appropriate capability
|
||||
* (CAP_SETUID or CAP_SETGID) over the parent user namespace.
|
||||
* And the opener of the id file also had the approprpiate capability.
|
||||
*/
|
||||
if (ns_capable(ns->parent, cap_setid))
|
||||
if (ns_capable(ns->parent, cap_setid) &&
|
||||
file_ns_capable(file, ns->parent, cap_setid))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -837,6 +853,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
|
||||
if (atomic_read(¤t->mm->mm_users) > 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (current->fs->users != 1)
|
||||
return -EINVAL;
|
||||
|
||||
if (!ns_capable(user_ns, CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
|
@@ -517,6 +517,11 @@ int proc_dowatchdog(struct ctl_table *table, int write,
|
||||
return ret;
|
||||
|
||||
set_sample_period();
|
||||
/*
|
||||
* Watchdog threads shouldn't be enabled if they are
|
||||
* disabled. The 'watchdog_disabled' variable check in
|
||||
* watchdog_*_all_cpus() function takes care of this.
|
||||
*/
|
||||
if (watchdog_enabled && watchdog_thresh)
|
||||
watchdog_enable_all_cpus();
|
||||
else
|
||||
|
2895
kernel/workqueue.c
2895
kernel/workqueue.c
File diff suppressed because it is too large
Load Diff
@@ -32,14 +32,12 @@ struct worker {
|
||||
struct list_head scheduled; /* L: scheduled works */
|
||||
struct task_struct *task; /* I: worker task */
|
||||
struct worker_pool *pool; /* I: the associated pool */
|
||||
/* L: for rescuers */
|
||||
/* 64 bytes boundary on 64bit, 32 on 32bit */
|
||||
unsigned long last_active; /* L: last active timestamp */
|
||||
unsigned int flags; /* X: flags */
|
||||
int id; /* I: worker id */
|
||||
|
||||
/* for rebinding worker to CPU */
|
||||
struct work_struct rebind_work; /* L: for busy worker */
|
||||
|
||||
/* used only by rescuers to point to the target workqueue */
|
||||
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
|
||||
};
|
||||
@@ -58,8 +56,7 @@ static inline struct worker *current_wq_worker(void)
|
||||
* Scheduler hooks for concurrency managed workqueue. Only to be used from
|
||||
* sched.c and workqueue.c.
|
||||
*/
|
||||
void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
|
||||
struct task_struct *wq_worker_sleeping(struct task_struct *task,
|
||||
unsigned int cpu);
|
||||
void wq_worker_waking_up(struct task_struct *task, int cpu);
|
||||
struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
|
||||
|
||||
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
|
||||
|
Reference in New Issue
Block a user