Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Merge upstream tree in order to reinstate crct10dif.
This commit is contained in:
@@ -9,7 +9,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \
|
||||
rcupdate.o extable.o params.o posix-timers.o \
|
||||
kthread.o wait.o sys_ni.o posix-cpu-timers.o mutex.o \
|
||||
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
|
||||
notifier.o ksysfs.o cred.o \
|
||||
notifier.o ksysfs.o cred.o reboot.o \
|
||||
async.o range.o groups.o lglock.o smpboot.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
|
@@ -85,6 +85,7 @@ struct audit_names {
|
||||
|
||||
struct filename *name;
|
||||
int name_len; /* number of chars to log */
|
||||
bool hidden; /* don't log this record */
|
||||
bool name_put; /* call __putname()? */
|
||||
|
||||
unsigned long ino;
|
||||
|
@@ -423,7 +423,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
|
||||
f->lsm_rule = NULL;
|
||||
|
||||
/* Support legacy tests for a valid loginuid */
|
||||
if ((f->type == AUDIT_LOGINUID) && (f->val == 4294967295)) {
|
||||
if ((f->type == AUDIT_LOGINUID) && (f->val == ~0U)) {
|
||||
f->type = AUDIT_LOGINUID_SET;
|
||||
f->val = 0;
|
||||
}
|
||||
@@ -865,6 +865,12 @@ static inline int audit_add_rule(struct audit_entry *entry)
|
||||
err = audit_add_watch(&entry->rule, &list);
|
||||
if (err) {
|
||||
mutex_unlock(&audit_filter_mutex);
|
||||
/*
|
||||
* normally audit_add_tree_rule() will free it
|
||||
* on failure
|
||||
*/
|
||||
if (tree)
|
||||
audit_put_tree(tree);
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
@@ -1399,8 +1399,11 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
|
||||
}
|
||||
|
||||
i = 0;
|
||||
list_for_each_entry(n, &context->names_list, list)
|
||||
list_for_each_entry(n, &context->names_list, list) {
|
||||
if (n->hidden)
|
||||
continue;
|
||||
audit_log_name(context, n, NULL, i++, &call_panic);
|
||||
}
|
||||
|
||||
/* Send end of event record to help user space know we are finished */
|
||||
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
|
||||
@@ -1769,14 +1772,15 @@ void audit_putname(struct filename *name)
|
||||
* __audit_inode - store the inode and device from a lookup
|
||||
* @name: name being audited
|
||||
* @dentry: dentry being audited
|
||||
* @parent: does this dentry represent the parent?
|
||||
* @flags: attributes for this particular entry
|
||||
*/
|
||||
void __audit_inode(struct filename *name, const struct dentry *dentry,
|
||||
unsigned int parent)
|
||||
unsigned int flags)
|
||||
{
|
||||
struct audit_context *context = current->audit_context;
|
||||
const struct inode *inode = dentry->d_inode;
|
||||
struct audit_names *n;
|
||||
bool parent = flags & AUDIT_INODE_PARENT;
|
||||
|
||||
if (!context->in_syscall)
|
||||
return;
|
||||
@@ -1831,6 +1835,8 @@ out:
|
||||
if (parent) {
|
||||
n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
|
||||
n->type = AUDIT_TYPE_PARENT;
|
||||
if (flags & AUDIT_INODE_HIDDEN)
|
||||
n->hidden = true;
|
||||
} else {
|
||||
n->name_len = AUDIT_NAME_FULL;
|
||||
n->type = AUDIT_TYPE_NORMAL;
|
||||
|
@@ -802,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
|
||||
*/
|
||||
|
||||
static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
|
||||
static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int);
|
||||
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
|
||||
static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files,
|
||||
unsigned long subsys_mask);
|
||||
@@ -1846,36 +1845,43 @@ out:
|
||||
EXPORT_SYMBOL_GPL(cgroup_path);
|
||||
|
||||
/**
|
||||
* task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
|
||||
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
|
||||
* @task: target task
|
||||
* @hierarchy_id: the hierarchy to look up @task's cgroup from
|
||||
* @buf: the buffer to write the path into
|
||||
* @buflen: the length of the buffer
|
||||
*
|
||||
* Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
|
||||
* copy its path into @buf. This function grabs cgroup_mutex and shouldn't
|
||||
* be used inside locks used by cgroup controller callbacks.
|
||||
* Determine @task's cgroup on the first (the one with the lowest non-zero
|
||||
* hierarchy_id) cgroup hierarchy and copy its path into @buf. This
|
||||
* function grabs cgroup_mutex and shouldn't be used inside locks used by
|
||||
* cgroup controller callbacks.
|
||||
*
|
||||
* Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
|
||||
*/
|
||||
int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
|
||||
char *buf, size_t buflen)
|
||||
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
|
||||
{
|
||||
struct cgroupfs_root *root;
|
||||
struct cgroup *cgrp = NULL;
|
||||
int ret = -ENOENT;
|
||||
struct cgroup *cgrp;
|
||||
int hierarchy_id = 1, ret = 0;
|
||||
|
||||
if (buflen < 2)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
|
||||
root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
|
||||
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
|
||||
|
||||
if (root) {
|
||||
cgrp = task_cgroup_from_root(task, root);
|
||||
ret = cgroup_path(cgrp, buf, buflen);
|
||||
} else {
|
||||
/* if no hierarchy exists, everyone is in "/" */
|
||||
memcpy(buf, "/", 2);
|
||||
}
|
||||
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
|
||||
EXPORT_SYMBOL_GPL(task_cgroup_path);
|
||||
|
||||
/*
|
||||
* Control Group taskset
|
||||
@@ -2642,7 +2648,7 @@ static const struct inode_operations cgroup_file_inode_operations = {
|
||||
};
|
||||
|
||||
static const struct inode_operations cgroup_dir_inode_operations = {
|
||||
.lookup = cgroup_lookup,
|
||||
.lookup = simple_lookup,
|
||||
.mkdir = cgroup_mkdir,
|
||||
.rmdir = cgroup_rmdir,
|
||||
.rename = cgroup_rename,
|
||||
@@ -2652,14 +2658,6 @@ static const struct inode_operations cgroup_dir_inode_operations = {
|
||||
.removexattr = cgroup_removexattr,
|
||||
};
|
||||
|
||||
static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
if (dentry->d_name.len > NAME_MAX)
|
||||
return ERR_PTR(-ENAMETOOLONG);
|
||||
d_add(dentry, NULL);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a file is a control file
|
||||
*/
|
||||
|
@@ -366,7 +366,7 @@ EXPORT_SYMBOL(cpu_down);
|
||||
#endif /*CONFIG_HOTPLUG_CPU*/
|
||||
|
||||
/* Requires cpu_add_remove_lock to be held */
|
||||
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
static int _cpu_up(unsigned int cpu, int tasks_frozen)
|
||||
{
|
||||
int ret, nr_calls = 0;
|
||||
void *hcpu = (void *)(long)cpu;
|
||||
@@ -419,7 +419,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __cpuinit cpu_up(unsigned int cpu)
|
||||
int cpu_up(unsigned int cpu)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
@@ -618,7 +618,7 @@ core_initcall(cpu_hotplug_pm_sync_init);
|
||||
* It must be called by the arch code on the new cpu, before the new cpu
|
||||
* enables interrupts and before the "boot" cpu returns from __cpu_up().
|
||||
*/
|
||||
void __cpuinit notify_cpu_starting(unsigned int cpu)
|
||||
void notify_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned long val = CPU_STARTING;
|
||||
|
||||
|
@@ -182,7 +182,7 @@ void update_perf_cpu_limits(void)
|
||||
u64 tmp = perf_sample_period_ns;
|
||||
|
||||
tmp *= sysctl_perf_cpu_time_max_percent;
|
||||
tmp = do_div(tmp, 100);
|
||||
do_div(tmp, 100);
|
||||
atomic_set(&perf_sample_allowed_ns, tmp);
|
||||
}
|
||||
|
||||
@@ -232,7 +232,7 @@ DEFINE_PER_CPU(u64, running_sample_length);
|
||||
void perf_sample_event_took(u64 sample_len_ns)
|
||||
{
|
||||
u64 avg_local_sample_len;
|
||||
u64 local_samples_len = __get_cpu_var(running_sample_length);
|
||||
u64 local_samples_len;
|
||||
|
||||
if (atomic_read(&perf_sample_allowed_ns) == 0)
|
||||
return;
|
||||
@@ -947,8 +947,18 @@ perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
|
||||
{
|
||||
struct perf_event_context *ctx;
|
||||
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
/*
|
||||
* One of the few rules of preemptible RCU is that one cannot do
|
||||
* rcu_read_unlock() while holding a scheduler (or nested) lock when
|
||||
* part of the read side critical section was preemptible -- see
|
||||
* rcu_read_unlock_special().
|
||||
*
|
||||
* Since ctx->lock nests under rq->lock we must ensure the entire read
|
||||
* side critical section is non-preemptible.
|
||||
*/
|
||||
preempt_disable();
|
||||
rcu_read_lock();
|
||||
ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
|
||||
if (ctx) {
|
||||
/*
|
||||
@@ -964,6 +974,8 @@ retry:
|
||||
raw_spin_lock_irqsave(&ctx->lock, *flags);
|
||||
if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
|
||||
raw_spin_unlock_irqrestore(&ctx->lock, *flags);
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
goto retry;
|
||||
}
|
||||
|
||||
@@ -973,6 +985,7 @@ retry:
|
||||
}
|
||||
}
|
||||
rcu_read_unlock();
|
||||
preempt_enable();
|
||||
return ctx;
|
||||
}
|
||||
|
||||
@@ -1950,7 +1963,16 @@ static int __perf_event_enable(void *info)
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
int err;
|
||||
|
||||
if (WARN_ON_ONCE(!ctx->is_active))
|
||||
/*
|
||||
* There's a time window between 'ctx->is_active' check
|
||||
* in perf_event_enable function and this place having:
|
||||
* - IRQs on
|
||||
* - ctx->lock unlocked
|
||||
*
|
||||
* where the task could be killed and 'ctx' deactivated
|
||||
* by perf_event_exit_task.
|
||||
*/
|
||||
if (!ctx->is_active)
|
||||
return -EINVAL;
|
||||
|
||||
raw_spin_lock(&ctx->lock);
|
||||
@@ -6212,8 +6234,6 @@ perf_event_mux_interval_ms_store(struct device *dev,
|
||||
return count;
|
||||
}
|
||||
|
||||
#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store)
|
||||
|
||||
static struct device_attribute pmu_dev_attrs[] = {
|
||||
__ATTR_RO(type),
|
||||
__ATTR_RW(perf_event_mux_interval_ms),
|
||||
@@ -7465,7 +7485,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
|
||||
* child.
|
||||
*/
|
||||
|
||||
child_ctx = alloc_perf_context(event->pmu, child);
|
||||
child_ctx = alloc_perf_context(parent_ctx->pmu, child);
|
||||
if (!child_ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -7608,7 +7628,7 @@ static void __init perf_event_init_all_cpus(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void __cpuinit perf_event_init_cpu(int cpu)
|
||||
static void perf_event_init_cpu(int cpu)
|
||||
{
|
||||
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
|
||||
|
||||
@@ -7697,7 +7717,7 @@ static struct notifier_block perf_reboot_notifier = {
|
||||
.priority = INT_MIN,
|
||||
};
|
||||
|
||||
static int __cpuinit
|
||||
static int
|
||||
perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
|
@@ -808,7 +808,7 @@ void do_exit(long code)
|
||||
/*
|
||||
* FIXME: do that only when needed, using sched_exit tracepoint
|
||||
*/
|
||||
ptrace_put_breakpoints(tsk);
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
|
||||
exit_notify(tsk, group_dead);
|
||||
#ifdef CONFIG_NUMA
|
||||
|
@@ -365,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||
mm->locked_vm = 0;
|
||||
mm->mmap = NULL;
|
||||
mm->mmap_cache = NULL;
|
||||
mm->free_area_cache = oldmm->mmap_base;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
mm->map_count = 0;
|
||||
cpumask_clear(mm_cpumask(mm));
|
||||
mm->mm_rb = RB_ROOT;
|
||||
@@ -540,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
|
||||
mm->nr_ptes = 0;
|
||||
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
|
||||
spin_lock_init(&mm->page_table_lock);
|
||||
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
mm_init_aio(mm);
|
||||
mm_init_owner(mm, p);
|
||||
|
||||
@@ -1550,7 +1546,7 @@ static inline void init_idle_pids(struct pid_link *links)
|
||||
}
|
||||
}
|
||||
|
||||
struct task_struct * __cpuinit fork_idle(int cpu)
|
||||
struct task_struct *fork_idle(int cpu)
|
||||
{
|
||||
struct task_struct *task;
|
||||
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
|
||||
|
@@ -722,17 +722,20 @@ static int hrtimer_switch_to_hres(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void clock_was_set_work(struct work_struct *work)
|
||||
{
|
||||
clock_was_set();
|
||||
}
|
||||
|
||||
static DECLARE_WORK(hrtimer_work, clock_was_set_work);
|
||||
|
||||
/*
|
||||
* Called from timekeeping code to reprogramm the hrtimer interrupt
|
||||
* device. If called from the timer interrupt context we defer it to
|
||||
* softirq context.
|
||||
* Called from timekeeping and resume code to reprogramm the hrtimer
|
||||
* interrupt device on all cpus.
|
||||
*/
|
||||
void clock_was_set_delayed(void)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
cpu_base->clock_was_set = 1;
|
||||
__raise_softirq_irqoff(HRTIMER_SOFTIRQ);
|
||||
schedule_work(&hrtimer_work);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -774,15 +777,19 @@ void clock_was_set(void)
|
||||
|
||||
/*
|
||||
* During resume we might have to reprogram the high resolution timer
|
||||
* interrupt (on the local CPU):
|
||||
* interrupt on all online CPUs. However, all other CPUs will be
|
||||
* stopped with IRQs interrupts disabled so the clock_was_set() call
|
||||
* must be deferred.
|
||||
*/
|
||||
void hrtimers_resume(void)
|
||||
{
|
||||
WARN_ONCE(!irqs_disabled(),
|
||||
KERN_INFO "hrtimers_resume() called with IRQs enabled!");
|
||||
|
||||
/* Retrigger on the local CPU */
|
||||
retrigger_next_event(NULL);
|
||||
timerfd_clock_was_set();
|
||||
/* And schedule a retrigger for all others */
|
||||
clock_was_set_delayed();
|
||||
}
|
||||
|
||||
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
|
||||
@@ -1433,13 +1440,6 @@ void hrtimer_peek_ahead_timers(void)
|
||||
|
||||
static void run_hrtimer_softirq(struct softirq_action *h)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
|
||||
|
||||
if (cpu_base->clock_was_set) {
|
||||
cpu_base->clock_was_set = 0;
|
||||
clock_was_set();
|
||||
}
|
||||
|
||||
hrtimer_peek_ahead_timers();
|
||||
}
|
||||
|
||||
@@ -1659,7 +1659,7 @@ SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp,
|
||||
/*
|
||||
* Functions related to boot-time initialization:
|
||||
*/
|
||||
static void __cpuinit init_hrtimers_cpu(int cpu)
|
||||
static void init_hrtimers_cpu(int cpu)
|
||||
{
|
||||
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
|
||||
int i;
|
||||
@@ -1740,7 +1740,7 @@ static void migrate_hrtimers(int scpu)
|
||||
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
|
||||
static int hrtimer_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
int scpu = (long)hcpu;
|
||||
@@ -1773,7 +1773,7 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata hrtimers_nb = {
|
||||
static struct notifier_block hrtimers_nb = {
|
||||
.notifier_call = hrtimer_cpu_notify,
|
||||
};
|
||||
|
||||
|
@@ -275,10 +275,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
|
||||
if (d->gc)
|
||||
return -EBUSY;
|
||||
|
||||
if (d->revmap_type != IRQ_DOMAIN_MAP_LINEAR)
|
||||
return -EINVAL;
|
||||
|
||||
numchips = d->revmap_data.linear.size / irqs_per_chip;
|
||||
numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip);
|
||||
if (!numchips)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -310,6 +307,7 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
|
||||
/* Calc pointer to the next generic chip */
|
||||
tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type);
|
||||
}
|
||||
d->name = name;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_alloc_domain_generic_chips);
|
||||
|
@@ -23,9 +23,11 @@ static DEFINE_MUTEX(revmap_trees_mutex);
|
||||
static struct irq_domain *irq_default_domain;
|
||||
|
||||
/**
|
||||
* irq_domain_alloc() - Allocate a new irq_domain data structure
|
||||
* __irq_domain_add() - Allocate a new irq_domain data structure
|
||||
* @of_node: optional device-tree node of the interrupt controller
|
||||
* @revmap_type: type of reverse mapping to use
|
||||
* @size: Size of linear map; 0 for radix mapping only
|
||||
* @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
|
||||
* direct mapping
|
||||
* @ops: map/unmap domain callbacks
|
||||
* @host_data: Controller private data pointer
|
||||
*
|
||||
@@ -33,41 +35,35 @@ static struct irq_domain *irq_default_domain;
|
||||
* register allocated irq_domain with irq_domain_register(). Returns pointer
|
||||
* to IRQ domain, or NULL on failure.
|
||||
*/
|
||||
static struct irq_domain *irq_domain_alloc(struct device_node *of_node,
|
||||
unsigned int revmap_type,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
|
||||
irq_hw_number_t hwirq_max, int direct_max,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
{
|
||||
struct irq_domain *domain;
|
||||
|
||||
domain = kzalloc_node(sizeof(*domain), GFP_KERNEL,
|
||||
of_node_to_nid(of_node));
|
||||
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
|
||||
GFP_KERNEL, of_node_to_nid(of_node));
|
||||
if (WARN_ON(!domain))
|
||||
return NULL;
|
||||
|
||||
/* Fill structure */
|
||||
domain->revmap_type = revmap_type;
|
||||
INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
|
||||
domain->ops = ops;
|
||||
domain->host_data = host_data;
|
||||
domain->of_node = of_node_get(of_node);
|
||||
domain->hwirq_max = hwirq_max;
|
||||
domain->revmap_size = size;
|
||||
domain->revmap_direct_max_irq = direct_max;
|
||||
|
||||
return domain;
|
||||
}
|
||||
|
||||
static void irq_domain_free(struct irq_domain *domain)
|
||||
{
|
||||
of_node_put(domain->of_node);
|
||||
kfree(domain);
|
||||
}
|
||||
|
||||
static void irq_domain_add(struct irq_domain *domain)
|
||||
{
|
||||
mutex_lock(&irq_domain_mutex);
|
||||
list_add(&domain->link, &irq_domain_list);
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
pr_debug("Allocated domain of type %d @0x%p\n",
|
||||
domain->revmap_type, domain);
|
||||
|
||||
pr_debug("Added domain %s\n", domain->name);
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__irq_domain_add);
|
||||
|
||||
/**
|
||||
* irq_domain_remove() - Remove an irq domain.
|
||||
@@ -81,29 +77,12 @@ void irq_domain_remove(struct irq_domain *domain)
|
||||
{
|
||||
mutex_lock(&irq_domain_mutex);
|
||||
|
||||
switch (domain->revmap_type) {
|
||||
case IRQ_DOMAIN_MAP_LEGACY:
|
||||
/*
|
||||
* Legacy domains don't manage their own irq_desc
|
||||
* allocations, we expect the caller to handle irq_desc
|
||||
* freeing on their own.
|
||||
*/
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_TREE:
|
||||
/*
|
||||
* radix_tree_delete() takes care of destroying the root
|
||||
* node when all entries are removed. Shout if there are
|
||||
* any mappings left.
|
||||
*/
|
||||
WARN_ON(domain->revmap_data.tree.height);
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_LINEAR:
|
||||
kfree(domain->revmap_data.linear.revmap);
|
||||
domain->revmap_data.linear.size = 0;
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_NOMAP:
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* radix_tree_delete() takes care of destroying the root
|
||||
* node when all entries are removed. Shout if there are
|
||||
* any mappings left.
|
||||
*/
|
||||
WARN_ON(domain->revmap_tree.height);
|
||||
|
||||
list_del(&domain->link);
|
||||
|
||||
@@ -115,44 +94,30 @@ void irq_domain_remove(struct irq_domain *domain)
|
||||
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
|
||||
pr_debug("Removed domain of type %d @0x%p\n",
|
||||
domain->revmap_type, domain);
|
||||
pr_debug("Removed domain %s\n", domain->name);
|
||||
|
||||
irq_domain_free(domain);
|
||||
of_node_put(domain->of_node);
|
||||
kfree(domain);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_remove);
|
||||
|
||||
static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
|
||||
irq_hw_number_t hwirq)
|
||||
{
|
||||
irq_hw_number_t first_hwirq = domain->revmap_data.legacy.first_hwirq;
|
||||
int size = domain->revmap_data.legacy.size;
|
||||
|
||||
if (WARN_ON(hwirq < first_hwirq || hwirq >= first_hwirq + size))
|
||||
return 0;
|
||||
return hwirq - first_hwirq + domain->revmap_data.legacy.first_irq;
|
||||
}
|
||||
|
||||
/**
|
||||
* irq_domain_add_simple() - Allocate and register a simple irq_domain.
|
||||
* irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
|
||||
* @of_node: pointer to interrupt controller's device tree node.
|
||||
* @size: total number of irqs in mapping
|
||||
* @first_irq: first number of irq block assigned to the domain,
|
||||
* pass zero to assign irqs on-the-fly. This will result in a
|
||||
* linear IRQ domain so it is important to use irq_create_mapping()
|
||||
* for each used IRQ, especially when SPARSE_IRQ is enabled.
|
||||
* pass zero to assign irqs on-the-fly. If first_irq is non-zero, then
|
||||
* pre-map all of the irqs in the domain to virqs starting at first_irq.
|
||||
* @ops: map/unmap domain callbacks
|
||||
* @host_data: Controller private data pointer
|
||||
*
|
||||
* Allocates a legacy irq_domain if irq_base is positive or a linear
|
||||
* domain otherwise. For the legacy domain, IRQ descriptors will also
|
||||
* be allocated.
|
||||
* Allocates an irq_domain, and optionally if first_irq is positive then also
|
||||
* allocate irq_descs and map all of the hwirqs to virqs starting at first_irq.
|
||||
*
|
||||
* This is intended to implement the expected behaviour for most
|
||||
* interrupt controllers which is that a linear mapping should
|
||||
* normally be used unless the system requires a legacy mapping in
|
||||
* order to support supplying interrupt numbers during non-DT
|
||||
* registration of devices.
|
||||
* interrupt controllers. If device tree is used, then first_irq will be 0 and
|
||||
* irqs get mapped dynamically on the fly. However, if the controller requires
|
||||
* static virq assignments (non-DT boot) then it will set that up correctly.
|
||||
*/
|
||||
struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
|
||||
unsigned int size,
|
||||
@@ -160,33 +125,25 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
{
|
||||
if (first_irq > 0) {
|
||||
int irq_base;
|
||||
struct irq_domain *domain;
|
||||
|
||||
domain = __irq_domain_add(of_node, size, size, 0, ops, host_data);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
|
||||
if (first_irq > 0) {
|
||||
if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
|
||||
/*
|
||||
* Set the descriptor allocator to search for a
|
||||
* 1-to-1 mapping, such as irq_alloc_desc_at().
|
||||
* Use of_node_to_nid() which is defined to
|
||||
* numa_node_id() on platforms that have no custom
|
||||
* implementation.
|
||||
*/
|
||||
irq_base = irq_alloc_descs(first_irq, first_irq, size,
|
||||
of_node_to_nid(of_node));
|
||||
if (irq_base < 0) {
|
||||
/* attempt to allocated irq_descs */
|
||||
int rc = irq_alloc_descs(first_irq, first_irq, size,
|
||||
of_node_to_nid(of_node));
|
||||
if (rc < 0)
|
||||
pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
|
||||
first_irq);
|
||||
irq_base = first_irq;
|
||||
}
|
||||
} else
|
||||
irq_base = first_irq;
|
||||
|
||||
return irq_domain_add_legacy(of_node, size, irq_base, 0,
|
||||
ops, host_data);
|
||||
}
|
||||
irq_domain_associate_many(domain, first_irq, 0, size);
|
||||
}
|
||||
|
||||
/* A linear domain is the default */
|
||||
return irq_domain_add_linear(of_node, size, ops, host_data);
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_add_simple);
|
||||
|
||||
@@ -213,130 +170,18 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
|
||||
void *host_data)
|
||||
{
|
||||
struct irq_domain *domain;
|
||||
unsigned int i;
|
||||
|
||||
domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LEGACY, ops, host_data);
|
||||
domain = __irq_domain_add(of_node, first_hwirq + size,
|
||||
first_hwirq + size, 0, ops, host_data);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
|
||||
domain->revmap_data.legacy.first_irq = first_irq;
|
||||
domain->revmap_data.legacy.first_hwirq = first_hwirq;
|
||||
domain->revmap_data.legacy.size = size;
|
||||
irq_domain_associate_many(domain, first_irq, first_hwirq, size);
|
||||
|
||||
mutex_lock(&irq_domain_mutex);
|
||||
/* Verify that all the irqs are available */
|
||||
for (i = 0; i < size; i++) {
|
||||
int irq = first_irq + i;
|
||||
struct irq_data *irq_data = irq_get_irq_data(irq);
|
||||
|
||||
if (WARN_ON(!irq_data || irq_data->domain)) {
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
irq_domain_free(domain);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Claim all of the irqs before registering a legacy domain */
|
||||
for (i = 0; i < size; i++) {
|
||||
struct irq_data *irq_data = irq_get_irq_data(first_irq + i);
|
||||
irq_data->hwirq = first_hwirq + i;
|
||||
irq_data->domain = domain;
|
||||
}
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
|
||||
for (i = 0; i < size; i++) {
|
||||
int irq = first_irq + i;
|
||||
int hwirq = first_hwirq + i;
|
||||
|
||||
/* IRQ0 gets ignored */
|
||||
if (!irq)
|
||||
continue;
|
||||
|
||||
/* Legacy flags are left to default at this point,
|
||||
* one can then use irq_create_mapping() to
|
||||
* explicitly change them
|
||||
*/
|
||||
if (ops->map)
|
||||
ops->map(domain, irq, hwirq);
|
||||
|
||||
/* Clear norequest flags */
|
||||
irq_clear_status_flags(irq, IRQ_NOREQUEST);
|
||||
}
|
||||
|
||||
irq_domain_add(domain);
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_add_legacy);
|
||||
|
||||
/**
|
||||
* irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
|
||||
* @of_node: pointer to interrupt controller's device tree node.
|
||||
* @size: Number of interrupts in the domain.
|
||||
* @ops: map/unmap domain callbacks
|
||||
* @host_data: Controller private data pointer
|
||||
*/
|
||||
struct irq_domain *irq_domain_add_linear(struct device_node *of_node,
|
||||
unsigned int size,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
{
|
||||
struct irq_domain *domain;
|
||||
unsigned int *revmap;
|
||||
|
||||
revmap = kzalloc_node(sizeof(*revmap) * size, GFP_KERNEL,
|
||||
of_node_to_nid(of_node));
|
||||
if (WARN_ON(!revmap))
|
||||
return NULL;
|
||||
|
||||
domain = irq_domain_alloc(of_node, IRQ_DOMAIN_MAP_LINEAR, ops, host_data);
|
||||
if (!domain) {
|
||||
kfree(revmap);
|
||||
return NULL;
|
||||
}
|
||||
domain->revmap_data.linear.size = size;
|
||||
domain->revmap_data.linear.revmap = revmap;
|
||||
irq_domain_add(domain);
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_add_linear);
|
||||
|
||||
struct irq_domain *irq_domain_add_nomap(struct device_node *of_node,
|
||||
unsigned int max_irq,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
{
|
||||
struct irq_domain *domain = irq_domain_alloc(of_node,
|
||||
IRQ_DOMAIN_MAP_NOMAP, ops, host_data);
|
||||
if (domain) {
|
||||
domain->revmap_data.nomap.max_irq = max_irq ? max_irq : ~0;
|
||||
irq_domain_add(domain);
|
||||
}
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_add_nomap);
|
||||
|
||||
/**
|
||||
* irq_domain_add_tree()
|
||||
* @of_node: pointer to interrupt controller's device tree node.
|
||||
* @ops: map/unmap domain callbacks
|
||||
*
|
||||
* Note: The radix tree will be allocated later during boot automatically
|
||||
* (the reverse mapping will use the slow path until that happens).
|
||||
*/
|
||||
struct irq_domain *irq_domain_add_tree(struct device_node *of_node,
|
||||
const struct irq_domain_ops *ops,
|
||||
void *host_data)
|
||||
{
|
||||
struct irq_domain *domain = irq_domain_alloc(of_node,
|
||||
IRQ_DOMAIN_MAP_TREE, ops, host_data);
|
||||
if (domain) {
|
||||
INIT_RADIX_TREE(&domain->revmap_data.tree, GFP_KERNEL);
|
||||
irq_domain_add(domain);
|
||||
}
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_add_tree);
|
||||
|
||||
/**
|
||||
* irq_find_host() - Locates a domain for a given device node
|
||||
* @node: device-tree node of the interrupt controller
|
||||
@@ -385,125 +230,108 @@ void irq_set_default_host(struct irq_domain *domain)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_set_default_host);
|
||||
|
||||
static void irq_domain_disassociate_many(struct irq_domain *domain,
|
||||
unsigned int irq_base, int count)
|
||||
static void irq_domain_disassociate(struct irq_domain *domain, unsigned int irq)
|
||||
{
|
||||
/*
|
||||
* disassociate in reverse order;
|
||||
* not strictly necessary, but nice for unwinding
|
||||
*/
|
||||
while (count--) {
|
||||
int irq = irq_base + count;
|
||||
struct irq_data *irq_data = irq_get_irq_data(irq);
|
||||
irq_hw_number_t hwirq;
|
||||
struct irq_data *irq_data = irq_get_irq_data(irq);
|
||||
irq_hw_number_t hwirq;
|
||||
|
||||
if (WARN_ON(!irq_data || irq_data->domain != domain))
|
||||
continue;
|
||||
if (WARN(!irq_data || irq_data->domain != domain,
|
||||
"virq%i doesn't exist; cannot disassociate\n", irq))
|
||||
return;
|
||||
|
||||
hwirq = irq_data->hwirq;
|
||||
irq_set_status_flags(irq, IRQ_NOREQUEST);
|
||||
hwirq = irq_data->hwirq;
|
||||
irq_set_status_flags(irq, IRQ_NOREQUEST);
|
||||
|
||||
/* remove chip and handler */
|
||||
irq_set_chip_and_handler(irq, NULL, NULL);
|
||||
/* remove chip and handler */
|
||||
irq_set_chip_and_handler(irq, NULL, NULL);
|
||||
|
||||
/* Make sure it's completed */
|
||||
synchronize_irq(irq);
|
||||
/* Make sure it's completed */
|
||||
synchronize_irq(irq);
|
||||
|
||||
/* Tell the PIC about it */
|
||||
if (domain->ops->unmap)
|
||||
domain->ops->unmap(domain, irq);
|
||||
smp_mb();
|
||||
/* Tell the PIC about it */
|
||||
if (domain->ops->unmap)
|
||||
domain->ops->unmap(domain, irq);
|
||||
smp_mb();
|
||||
|
||||
irq_data->domain = NULL;
|
||||
irq_data->hwirq = 0;
|
||||
irq_data->domain = NULL;
|
||||
irq_data->hwirq = 0;
|
||||
|
||||
/* Clear reverse map */
|
||||
switch(domain->revmap_type) {
|
||||
case IRQ_DOMAIN_MAP_LINEAR:
|
||||
if (hwirq < domain->revmap_data.linear.size)
|
||||
domain->revmap_data.linear.revmap[hwirq] = 0;
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_TREE:
|
||||
mutex_lock(&revmap_trees_mutex);
|
||||
radix_tree_delete(&domain->revmap_data.tree, hwirq);
|
||||
mutex_unlock(&revmap_trees_mutex);
|
||||
break;
|
||||
}
|
||||
/* Clear reverse map for this hwirq */
|
||||
if (hwirq < domain->revmap_size) {
|
||||
domain->linear_revmap[hwirq] = 0;
|
||||
} else {
|
||||
mutex_lock(&revmap_trees_mutex);
|
||||
radix_tree_delete(&domain->revmap_tree, hwirq);
|
||||
mutex_unlock(&revmap_trees_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
int irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
|
||||
irq_hw_number_t hwirq_base, int count)
|
||||
int irq_domain_associate(struct irq_domain *domain, unsigned int virq,
|
||||
irq_hw_number_t hwirq)
|
||||
{
|
||||
unsigned int virq = irq_base;
|
||||
irq_hw_number_t hwirq = hwirq_base;
|
||||
int i, ret;
|
||||
struct irq_data *irq_data = irq_get_irq_data(virq);
|
||||
int ret;
|
||||
|
||||
if (WARN(hwirq >= domain->hwirq_max,
|
||||
"error: hwirq 0x%x is too large for %s\n", (int)hwirq, domain->name))
|
||||
return -EINVAL;
|
||||
if (WARN(!irq_data, "error: virq%i is not allocated", virq))
|
||||
return -EINVAL;
|
||||
if (WARN(irq_data->domain, "error: virq%i is already associated", virq))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&irq_domain_mutex);
|
||||
irq_data->hwirq = hwirq;
|
||||
irq_data->domain = domain;
|
||||
if (domain->ops->map) {
|
||||
ret = domain->ops->map(domain, virq, hwirq);
|
||||
if (ret != 0) {
|
||||
/*
|
||||
* If map() returns -EPERM, this interrupt is protected
|
||||
* by the firmware or some other service and shall not
|
||||
* be mapped. Don't bother telling the user about it.
|
||||
*/
|
||||
if (ret != -EPERM) {
|
||||
pr_info("%s didn't like hwirq-0x%lx to VIRQ%i mapping (rc=%d)\n",
|
||||
domain->name, hwirq, virq, ret);
|
||||
}
|
||||
irq_data->domain = NULL;
|
||||
irq_data->hwirq = 0;
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* If not already assigned, give the domain the chip's name */
|
||||
if (!domain->name && irq_data->chip)
|
||||
domain->name = irq_data->chip->name;
|
||||
}
|
||||
|
||||
if (hwirq < domain->revmap_size) {
|
||||
domain->linear_revmap[hwirq] = virq;
|
||||
} else {
|
||||
mutex_lock(&revmap_trees_mutex);
|
||||
radix_tree_insert(&domain->revmap_tree, hwirq, irq_data);
|
||||
mutex_unlock(&revmap_trees_mutex);
|
||||
}
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
|
||||
irq_clear_status_flags(virq, IRQ_NOREQUEST);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_associate);
|
||||
|
||||
void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base,
|
||||
irq_hw_number_t hwirq_base, int count)
|
||||
{
|
||||
int i;
|
||||
|
||||
pr_debug("%s(%s, irqbase=%i, hwbase=%i, count=%i)\n", __func__,
|
||||
of_node_full_name(domain->of_node), irq_base, (int)hwirq_base, count);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
struct irq_data *irq_data = irq_get_irq_data(virq + i);
|
||||
|
||||
if (WARN(!irq_data, "error: irq_desc not allocated; "
|
||||
"irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i))
|
||||
return -EINVAL;
|
||||
if (WARN(irq_data->domain, "error: irq_desc already associated; "
|
||||
"irq=%i hwirq=0x%x\n", virq + i, (int)hwirq + i))
|
||||
return -EINVAL;
|
||||
};
|
||||
|
||||
for (i = 0; i < count; i++, virq++, hwirq++) {
|
||||
struct irq_data *irq_data = irq_get_irq_data(virq);
|
||||
|
||||
irq_data->hwirq = hwirq;
|
||||
irq_data->domain = domain;
|
||||
if (domain->ops->map) {
|
||||
ret = domain->ops->map(domain, virq, hwirq);
|
||||
if (ret != 0) {
|
||||
/*
|
||||
* If map() returns -EPERM, this interrupt is protected
|
||||
* by the firmware or some other service and shall not
|
||||
* be mapped.
|
||||
*
|
||||
* Since on some platforms we blindly try to map everything
|
||||
* we end up with a log full of backtraces.
|
||||
*
|
||||
* So instead, we silently fail on -EPERM, it is the
|
||||
* responsibility of the PIC driver to display a relevant
|
||||
* message if needed.
|
||||
*/
|
||||
if (ret != -EPERM) {
|
||||
pr_err("irq-%i==>hwirq-0x%lx mapping failed: %d\n",
|
||||
virq, hwirq, ret);
|
||||
WARN_ON(1);
|
||||
}
|
||||
irq_data->domain = NULL;
|
||||
irq_data->hwirq = 0;
|
||||
goto err_unmap;
|
||||
}
|
||||
}
|
||||
|
||||
switch (domain->revmap_type) {
|
||||
case IRQ_DOMAIN_MAP_LINEAR:
|
||||
if (hwirq < domain->revmap_data.linear.size)
|
||||
domain->revmap_data.linear.revmap[hwirq] = virq;
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_TREE:
|
||||
mutex_lock(&revmap_trees_mutex);
|
||||
radix_tree_insert(&domain->revmap_data.tree, hwirq, irq_data);
|
||||
mutex_unlock(&revmap_trees_mutex);
|
||||
break;
|
||||
}
|
||||
|
||||
irq_clear_status_flags(virq, IRQ_NOREQUEST);
|
||||
irq_domain_associate(domain, irq_base + i, hwirq_base + i);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_unmap:
|
||||
irq_domain_disassociate_many(domain, irq_base, i);
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_associate_many);
|
||||
|
||||
@@ -513,7 +341,9 @@ EXPORT_SYMBOL_GPL(irq_domain_associate_many);
|
||||
*
|
||||
* This routine is used for irq controllers which can choose the hardware
|
||||
* interrupt numbers they generate. In such a case it's simplest to use
|
||||
* the linux irq as the hardware interrupt number.
|
||||
* the linux irq as the hardware interrupt number. It still uses the linear
|
||||
* or radix tree to store the mapping, but the irq controller can optimize
|
||||
* the revmap path by using the hwirq directly.
|
||||
*/
|
||||
unsigned int irq_create_direct_mapping(struct irq_domain *domain)
|
||||
{
|
||||
@@ -522,17 +352,14 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
|
||||
if (domain == NULL)
|
||||
domain = irq_default_domain;
|
||||
|
||||
if (WARN_ON(!domain || domain->revmap_type != IRQ_DOMAIN_MAP_NOMAP))
|
||||
return 0;
|
||||
|
||||
virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
|
||||
if (!virq) {
|
||||
pr_debug("create_direct virq allocation failed\n");
|
||||
return 0;
|
||||
}
|
||||
if (virq >= domain->revmap_data.nomap.max_irq) {
|
||||
if (virq >= domain->revmap_direct_max_irq) {
|
||||
pr_err("ERROR: no free irqs available below %i maximum\n",
|
||||
domain->revmap_data.nomap.max_irq);
|
||||
domain->revmap_direct_max_irq);
|
||||
irq_free_desc(virq);
|
||||
return 0;
|
||||
}
|
||||
@@ -569,9 +396,7 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
|
||||
if (domain == NULL)
|
||||
domain = irq_default_domain;
|
||||
if (domain == NULL) {
|
||||
pr_warning("irq_create_mapping called for"
|
||||
" NULL domain, hwirq=%lx\n", hwirq);
|
||||
WARN_ON(1);
|
||||
WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
|
||||
return 0;
|
||||
}
|
||||
pr_debug("-> using domain @%p\n", domain);
|
||||
@@ -583,10 +408,6 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
|
||||
return virq;
|
||||
}
|
||||
|
||||
/* Get a virtual interrupt number */
|
||||
if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
|
||||
return irq_domain_legacy_revmap(domain, hwirq);
|
||||
|
||||
/* Allocate a virtual interrupt number */
|
||||
hint = hwirq % nr_irqs;
|
||||
if (hint == 0)
|
||||
@@ -639,12 +460,7 @@ int irq_create_strict_mappings(struct irq_domain *domain, unsigned int irq_base,
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
ret = irq_domain_associate_many(domain, irq_base, hwirq_base, count);
|
||||
if (unlikely(ret < 0)) {
|
||||
irq_free_descs(irq_base, count);
|
||||
return ret;
|
||||
}
|
||||
|
||||
irq_domain_associate_many(domain, irq_base, hwirq_base, count);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_create_strict_mappings);
|
||||
@@ -659,20 +475,8 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
|
||||
|
||||
domain = controller ? irq_find_host(controller) : irq_default_domain;
|
||||
if (!domain) {
|
||||
#ifdef CONFIG_MIPS
|
||||
/*
|
||||
* Workaround to avoid breaking interrupt controller drivers
|
||||
* that don't yet register an irq_domain. This is temporary
|
||||
* code. ~~~gcl, Feb 24, 2012
|
||||
*
|
||||
* Scheduled for removal in Linux v3.6. That should be enough
|
||||
* time.
|
||||
*/
|
||||
if (intsize > 0)
|
||||
return intspec[0];
|
||||
#endif
|
||||
pr_warning("no irq domain found for %s !\n",
|
||||
of_node_full_name(controller));
|
||||
pr_warn("no irq domain found for %s !\n",
|
||||
of_node_full_name(controller));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -714,11 +518,7 @@ void irq_dispose_mapping(unsigned int virq)
|
||||
if (WARN_ON(domain == NULL))
|
||||
return;
|
||||
|
||||
/* Never unmap legacy interrupts */
|
||||
if (domain->revmap_type == IRQ_DOMAIN_MAP_LEGACY)
|
||||
return;
|
||||
|
||||
irq_domain_disassociate_many(domain, virq, 1);
|
||||
irq_domain_disassociate(domain, virq);
|
||||
irq_free_desc(virq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_dispose_mapping);
|
||||
@@ -739,63 +539,51 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
|
||||
if (domain == NULL)
|
||||
return 0;
|
||||
|
||||
switch (domain->revmap_type) {
|
||||
case IRQ_DOMAIN_MAP_LEGACY:
|
||||
return irq_domain_legacy_revmap(domain, hwirq);
|
||||
case IRQ_DOMAIN_MAP_LINEAR:
|
||||
return irq_linear_revmap(domain, hwirq);
|
||||
case IRQ_DOMAIN_MAP_TREE:
|
||||
rcu_read_lock();
|
||||
data = radix_tree_lookup(&domain->revmap_data.tree, hwirq);
|
||||
rcu_read_unlock();
|
||||
if (data)
|
||||
return data->irq;
|
||||
break;
|
||||
case IRQ_DOMAIN_MAP_NOMAP:
|
||||
if (hwirq < domain->revmap_direct_max_irq) {
|
||||
data = irq_get_irq_data(hwirq);
|
||||
if (data && (data->domain == domain) && (data->hwirq == hwirq))
|
||||
return hwirq;
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
/* Check if the hwirq is in the linear revmap. */
|
||||
if (hwirq < domain->revmap_size)
|
||||
return domain->linear_revmap[hwirq];
|
||||
|
||||
rcu_read_lock();
|
||||
data = radix_tree_lookup(&domain->revmap_tree, hwirq);
|
||||
rcu_read_unlock();
|
||||
return data ? data->irq : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_find_mapping);
|
||||
|
||||
/**
|
||||
* irq_linear_revmap() - Find a linux irq from a hw irq number.
|
||||
* @domain: domain owning this hardware interrupt
|
||||
* @hwirq: hardware irq number in that domain space
|
||||
*
|
||||
* This is a fast path that can be called directly by irq controller code to
|
||||
* save a handful of instructions.
|
||||
*/
|
||||
unsigned int irq_linear_revmap(struct irq_domain *domain,
|
||||
irq_hw_number_t hwirq)
|
||||
{
|
||||
BUG_ON(domain->revmap_type != IRQ_DOMAIN_MAP_LINEAR);
|
||||
|
||||
/* Check revmap bounds; complain if exceeded */
|
||||
if (WARN_ON(hwirq >= domain->revmap_data.linear.size))
|
||||
return 0;
|
||||
|
||||
return domain->revmap_data.linear.revmap[hwirq];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_linear_revmap);
|
||||
|
||||
#ifdef CONFIG_IRQ_DOMAIN_DEBUG
|
||||
static int virq_debug_show(struct seq_file *m, void *private)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct irq_desc *desc;
|
||||
const char *p;
|
||||
static const char none[] = "none";
|
||||
void *data;
|
||||
struct irq_domain *domain;
|
||||
struct radix_tree_iter iter;
|
||||
void *data, **slot;
|
||||
int i;
|
||||
|
||||
seq_printf(m, "%-5s %-7s %-15s %-*s %s\n", "irq", "hwirq",
|
||||
seq_printf(m, " %-16s %-6s %-10s %-10s %s\n",
|
||||
"name", "mapped", "linear-max", "direct-max", "devtree-node");
|
||||
mutex_lock(&irq_domain_mutex);
|
||||
list_for_each_entry(domain, &irq_domain_list, link) {
|
||||
int count = 0;
|
||||
radix_tree_for_each_slot(slot, &domain->revmap_tree, &iter, 0)
|
||||
count++;
|
||||
seq_printf(m, "%c%-16s %6u %10u %10u %s\n",
|
||||
domain == irq_default_domain ? '*' : ' ', domain->name,
|
||||
domain->revmap_size + count, domain->revmap_size,
|
||||
domain->revmap_direct_max_irq,
|
||||
domain->of_node ? of_node_full_name(domain->of_node) : "");
|
||||
}
|
||||
mutex_unlock(&irq_domain_mutex);
|
||||
|
||||
seq_printf(m, "%-5s %-7s %-15s %-*s %6s %-14s %s\n", "irq", "hwirq",
|
||||
"chip name", (int)(2 * sizeof(void *) + 2), "chip data",
|
||||
"domain name");
|
||||
"active", "type", "domain");
|
||||
|
||||
for (i = 1; i < nr_irqs; i++) {
|
||||
desc = irq_to_desc(i);
|
||||
@@ -803,28 +591,28 @@ static int virq_debug_show(struct seq_file *m, void *private)
|
||||
continue;
|
||||
|
||||
raw_spin_lock_irqsave(&desc->lock, flags);
|
||||
domain = desc->irq_data.domain;
|
||||
|
||||
if (desc->action && desc->action->handler) {
|
||||
if (domain) {
|
||||
struct irq_chip *chip;
|
||||
int hwirq = desc->irq_data.hwirq;
|
||||
bool direct;
|
||||
|
||||
seq_printf(m, "%5d ", i);
|
||||
seq_printf(m, "0x%05lx ", desc->irq_data.hwirq);
|
||||
seq_printf(m, "0x%05x ", hwirq);
|
||||
|
||||
chip = irq_desc_get_chip(desc);
|
||||
if (chip && chip->name)
|
||||
p = chip->name;
|
||||
else
|
||||
p = none;
|
||||
seq_printf(m, "%-15s ", p);
|
||||
seq_printf(m, "%-15s ", (chip && chip->name) ? chip->name : "none");
|
||||
|
||||
data = irq_desc_get_chip_data(desc);
|
||||
seq_printf(m, data ? "0x%p " : " %p ", data);
|
||||
|
||||
if (desc->irq_data.domain)
|
||||
p = of_node_full_name(desc->irq_data.domain->of_node);
|
||||
else
|
||||
p = none;
|
||||
seq_printf(m, "%s\n", p);
|
||||
seq_printf(m, " %c ", (desc->action && desc->action->handler) ? '*' : ' ');
|
||||
direct = (i == hwirq) && (i < domain->revmap_direct_max_irq);
|
||||
seq_printf(m, "%6s%-8s ",
|
||||
(hwirq < domain->revmap_size) ? "LINEAR" : "RADIX",
|
||||
direct ? "(DIRECT)" : "");
|
||||
seq_printf(m, "%s\n", desc->irq_data.domain->name);
|
||||
}
|
||||
|
||||
raw_spin_unlock_irqrestore(&desc->lock, flags);
|
||||
@@ -921,18 +709,3 @@ const struct irq_domain_ops irq_domain_simple_ops = {
|
||||
.xlate = irq_domain_xlate_onetwocell,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
|
||||
|
||||
#ifdef CONFIG_OF_IRQ
|
||||
void irq_domain_generate_simple(const struct of_device_id *match,
|
||||
u64 phys_base, unsigned int irq_start)
|
||||
{
|
||||
struct device_node *node;
|
||||
pr_debug("looking for phys_base=%llx, irq_start=%i\n",
|
||||
(unsigned long long) phys_base, (int) irq_start);
|
||||
node = of_find_matching_node_by_address(NULL, match, phys_base);
|
||||
if (node)
|
||||
irq_domain_add_legacy(node, 32, irq_start, 0,
|
||||
&irq_domain_simple_ops, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(irq_domain_generate_simple);
|
||||
#endif
|
||||
|
@@ -462,6 +462,8 @@ int show_interrupts(struct seq_file *p, void *v)
|
||||
} else {
|
||||
seq_printf(p, " %8s", "None");
|
||||
}
|
||||
if (desc->irq_data.domain)
|
||||
seq_printf(p, " %*d", prec, (int) desc->irq_data.hwirq);
|
||||
#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
|
||||
seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
|
||||
#endif
|
||||
|
@@ -455,7 +455,7 @@ const struct kernel_symbol *find_symbol(const char *name,
|
||||
EXPORT_SYMBOL_GPL(find_symbol);
|
||||
|
||||
/* Search for module by name: must hold module_mutex. */
|
||||
static struct module *find_module_all(const char *name,
|
||||
static struct module *find_module_all(const char *name, size_t len,
|
||||
bool even_unformed)
|
||||
{
|
||||
struct module *mod;
|
||||
@@ -463,7 +463,7 @@ static struct module *find_module_all(const char *name,
|
||||
list_for_each_entry(mod, &modules, list) {
|
||||
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
|
||||
continue;
|
||||
if (strcmp(mod->name, name) == 0)
|
||||
if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
|
||||
return mod;
|
||||
}
|
||||
return NULL;
|
||||
@@ -471,7 +471,7 @@ static struct module *find_module_all(const char *name,
|
||||
|
||||
struct module *find_module(const char *name)
|
||||
{
|
||||
return find_module_all(name, false);
|
||||
return find_module_all(name, strlen(name), false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(find_module);
|
||||
|
||||
@@ -482,23 +482,28 @@ static inline void __percpu *mod_percpu(struct module *mod)
|
||||
return mod->percpu;
|
||||
}
|
||||
|
||||
static int percpu_modalloc(struct module *mod,
|
||||
unsigned long size, unsigned long align)
|
||||
static int percpu_modalloc(struct module *mod, struct load_info *info)
|
||||
{
|
||||
Elf_Shdr *pcpusec = &info->sechdrs[info->index.pcpu];
|
||||
unsigned long align = pcpusec->sh_addralign;
|
||||
|
||||
if (!pcpusec->sh_size)
|
||||
return 0;
|
||||
|
||||
if (align > PAGE_SIZE) {
|
||||
printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
|
||||
mod->name, align, PAGE_SIZE);
|
||||
align = PAGE_SIZE;
|
||||
}
|
||||
|
||||
mod->percpu = __alloc_reserved_percpu(size, align);
|
||||
mod->percpu = __alloc_reserved_percpu(pcpusec->sh_size, align);
|
||||
if (!mod->percpu) {
|
||||
printk(KERN_WARNING
|
||||
"%s: Could not allocate %lu bytes percpu data\n",
|
||||
mod->name, size);
|
||||
mod->name, (unsigned long)pcpusec->sh_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
mod->percpu_size = size;
|
||||
mod->percpu_size = pcpusec->sh_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -563,10 +568,12 @@ static inline void __percpu *mod_percpu(struct module *mod)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline int percpu_modalloc(struct module *mod,
|
||||
unsigned long size, unsigned long align)
|
||||
static int percpu_modalloc(struct module *mod, struct load_info *info)
|
||||
{
|
||||
return -ENOMEM;
|
||||
/* UP modules shouldn't have this section: ENOMEM isn't quite right */
|
||||
if (info->sechdrs[info->index.pcpu].sh_size != 0)
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
static inline void percpu_modfree(struct module *mod)
|
||||
{
|
||||
@@ -2927,7 +2934,6 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
|
||||
{
|
||||
/* Module within temporary copy. */
|
||||
struct module *mod;
|
||||
Elf_Shdr *pcpusec;
|
||||
int err;
|
||||
|
||||
mod = setup_load_info(info, flags);
|
||||
@@ -2942,17 +2948,10 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
|
||||
err = module_frob_arch_sections(info->hdr, info->sechdrs,
|
||||
info->secstrings, mod);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
return ERR_PTR(err);
|
||||
|
||||
pcpusec = &info->sechdrs[info->index.pcpu];
|
||||
if (pcpusec->sh_size) {
|
||||
/* We have a special allocation for this section. */
|
||||
err = percpu_modalloc(mod,
|
||||
pcpusec->sh_size, pcpusec->sh_addralign);
|
||||
if (err)
|
||||
goto out;
|
||||
pcpusec->sh_flags &= ~(unsigned long)SHF_ALLOC;
|
||||
}
|
||||
/* We will do a special allocation for per-cpu sections later. */
|
||||
info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
|
||||
|
||||
/* Determine total sizes, and put offsets in sh_entsize. For now
|
||||
this is done generically; there doesn't appear to be any
|
||||
@@ -2963,17 +2962,12 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
|
||||
/* Allocate and move to the final place */
|
||||
err = move_module(mod, info);
|
||||
if (err)
|
||||
goto free_percpu;
|
||||
return ERR_PTR(err);
|
||||
|
||||
/* Module has been copied to its final place now: return it. */
|
||||
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
|
||||
kmemleak_load_module(mod, info);
|
||||
return mod;
|
||||
|
||||
free_percpu:
|
||||
percpu_modfree(mod);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
/* mod is no longer valid after this! */
|
||||
@@ -3014,7 +3008,7 @@ static bool finished_loading(const char *name)
|
||||
bool ret;
|
||||
|
||||
mutex_lock(&module_mutex);
|
||||
mod = find_module_all(name, true);
|
||||
mod = find_module_all(name, strlen(name), true);
|
||||
ret = !mod || mod->state == MODULE_STATE_LIVE
|
||||
|| mod->state == MODULE_STATE_GOING;
|
||||
mutex_unlock(&module_mutex);
|
||||
@@ -3152,7 +3146,8 @@ static int add_unformed_module(struct module *mod)
|
||||
|
||||
again:
|
||||
mutex_lock(&module_mutex);
|
||||
if ((old = find_module_all(mod->name, true)) != NULL) {
|
||||
old = find_module_all(mod->name, strlen(mod->name), true);
|
||||
if (old != NULL) {
|
||||
if (old->state == MODULE_STATE_COMING
|
||||
|| old->state == MODULE_STATE_UNFORMED) {
|
||||
/* Wait in case it fails to load. */
|
||||
@@ -3198,6 +3193,17 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int unknown_module_param_cb(char *param, char *val, const char *modname)
|
||||
{
|
||||
/* Check for magic 'dyndbg' arg */
|
||||
int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
|
||||
if (ret != 0) {
|
||||
printk(KERN_WARNING "%s: unknown parameter '%s' ignored\n",
|
||||
modname, param);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Allocate and load the module: note that size of section 0 is always
|
||||
zero, and we rely on this for optional sections. */
|
||||
static int load_module(struct load_info *info, const char __user *uargs,
|
||||
@@ -3237,6 +3243,11 @@ static int load_module(struct load_info *info, const char __user *uargs,
|
||||
}
|
||||
#endif
|
||||
|
||||
/* To avoid stressing percpu allocator, do this once we're unique. */
|
||||
err = percpu_modalloc(mod, info);
|
||||
if (err)
|
||||
goto unlink_mod;
|
||||
|
||||
/* Now module is in final location, initialize linked lists, etc. */
|
||||
err = module_unload_init(mod);
|
||||
if (err)
|
||||
@@ -3284,7 +3295,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
|
||||
|
||||
/* Module is ready to execute: parsing args may do that. */
|
||||
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
|
||||
-32768, 32767, &ddebug_dyndbg_module_param_cb);
|
||||
-32768, 32767, unknown_module_param_cb);
|
||||
if (err < 0)
|
||||
goto bug_cleanup;
|
||||
|
||||
@@ -3563,10 +3574,8 @@ unsigned long module_kallsyms_lookup_name(const char *name)
|
||||
/* Don't lock: we're in enough trouble already. */
|
||||
preempt_disable();
|
||||
if ((colon = strchr(name, ':')) != NULL) {
|
||||
*colon = '\0';
|
||||
if ((mod = find_module(name)) != NULL)
|
||||
if ((mod = find_module_all(name, colon - name, false)) != NULL)
|
||||
ret = mod_find_symname(mod, colon+1);
|
||||
*colon = ':';
|
||||
} else {
|
||||
list_for_each_entry_rcu(mod, &modules, list) {
|
||||
if (mod->state == MODULE_STATE_UNFORMED)
|
||||
|
@@ -18,6 +18,7 @@
|
||||
* Also see Documentation/mutex-design.txt.
|
||||
*/
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/ww_mutex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/export.h>
|
||||
|
@@ -15,6 +15,7 @@
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kexec.h>
|
||||
@@ -399,8 +400,11 @@ struct slowpath_args {
|
||||
static void warn_slowpath_common(const char *file, int line, void *caller,
|
||||
unsigned taint, struct slowpath_args *args)
|
||||
{
|
||||
printk(KERN_WARNING "------------[ cut here ]------------\n");
|
||||
printk(KERN_WARNING "WARNING: at %s:%d %pS()\n", file, line, caller);
|
||||
disable_trace_on_warning();
|
||||
|
||||
pr_warn("------------[ cut here ]------------\n");
|
||||
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS()\n",
|
||||
raw_smp_processor_id(), current->pid, file, line, caller);
|
||||
|
||||
if (args)
|
||||
vprintk(args->fmt, args->args);
|
||||
|
@@ -787,7 +787,7 @@ static void __init kernel_add_sysfs_param(const char *name,
|
||||
}
|
||||
|
||||
/*
|
||||
* param_sysfs_builtin - add contents in /sys/parameters for built-in modules
|
||||
* param_sysfs_builtin - add sysfs parameters for built-in modules
|
||||
*
|
||||
* Add module_parameters to sysfs for "modules" built into the kernel.
|
||||
*
|
||||
|
@@ -51,59 +51,28 @@ static int check_clock(const clockid_t which_clock)
|
||||
return error;
|
||||
}
|
||||
|
||||
static inline union cpu_time_count
|
||||
static inline unsigned long long
|
||||
timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
|
||||
{
|
||||
union cpu_time_count ret;
|
||||
ret.sched = 0; /* high half always zero when .cpu used */
|
||||
unsigned long long ret;
|
||||
|
||||
ret = 0; /* high half always zero when .cpu used */
|
||||
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
|
||||
ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
|
||||
ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
|
||||
} else {
|
||||
ret.cpu = timespec_to_cputime(tp);
|
||||
ret = cputime_to_expires(timespec_to_cputime(tp));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void sample_to_timespec(const clockid_t which_clock,
|
||||
union cpu_time_count cpu,
|
||||
unsigned long long expires,
|
||||
struct timespec *tp)
|
||||
{
|
||||
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
|
||||
*tp = ns_to_timespec(cpu.sched);
|
||||
*tp = ns_to_timespec(expires);
|
||||
else
|
||||
cputime_to_timespec(cpu.cpu, tp);
|
||||
}
|
||||
|
||||
static inline int cpu_time_before(const clockid_t which_clock,
|
||||
union cpu_time_count now,
|
||||
union cpu_time_count then)
|
||||
{
|
||||
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
|
||||
return now.sched < then.sched;
|
||||
} else {
|
||||
return now.cpu < then.cpu;
|
||||
}
|
||||
}
|
||||
static inline void cpu_time_add(const clockid_t which_clock,
|
||||
union cpu_time_count *acc,
|
||||
union cpu_time_count val)
|
||||
{
|
||||
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
|
||||
acc->sched += val.sched;
|
||||
} else {
|
||||
acc->cpu += val.cpu;
|
||||
}
|
||||
}
|
||||
static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
|
||||
union cpu_time_count a,
|
||||
union cpu_time_count b)
|
||||
{
|
||||
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
|
||||
a.sched -= b.sched;
|
||||
} else {
|
||||
a.cpu -= b.cpu;
|
||||
}
|
||||
return a;
|
||||
cputime_to_timespec((__force cputime_t)expires, tp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -111,47 +80,31 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
|
||||
* given the current clock sample.
|
||||
*/
|
||||
static void bump_cpu_timer(struct k_itimer *timer,
|
||||
union cpu_time_count now)
|
||||
unsigned long long now)
|
||||
{
|
||||
int i;
|
||||
unsigned long long delta, incr;
|
||||
|
||||
if (timer->it.cpu.incr.sched == 0)
|
||||
if (timer->it.cpu.incr == 0)
|
||||
return;
|
||||
|
||||
if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
|
||||
unsigned long long delta, incr;
|
||||
if (now < timer->it.cpu.expires)
|
||||
return;
|
||||
|
||||
if (now.sched < timer->it.cpu.expires.sched)
|
||||
return;
|
||||
incr = timer->it.cpu.incr.sched;
|
||||
delta = now.sched + incr - timer->it.cpu.expires.sched;
|
||||
/* Don't use (incr*2 < delta), incr*2 might overflow. */
|
||||
for (i = 0; incr < delta - incr; i++)
|
||||
incr = incr << 1;
|
||||
for (; i >= 0; incr >>= 1, i--) {
|
||||
if (delta < incr)
|
||||
continue;
|
||||
timer->it.cpu.expires.sched += incr;
|
||||
timer->it_overrun += 1 << i;
|
||||
delta -= incr;
|
||||
}
|
||||
} else {
|
||||
cputime_t delta, incr;
|
||||
incr = timer->it.cpu.incr;
|
||||
delta = now + incr - timer->it.cpu.expires;
|
||||
|
||||
if (now.cpu < timer->it.cpu.expires.cpu)
|
||||
return;
|
||||
incr = timer->it.cpu.incr.cpu;
|
||||
delta = now.cpu + incr - timer->it.cpu.expires.cpu;
|
||||
/* Don't use (incr*2 < delta), incr*2 might overflow. */
|
||||
for (i = 0; incr < delta - incr; i++)
|
||||
incr += incr;
|
||||
for (; i >= 0; incr = incr >> 1, i--) {
|
||||
if (delta < incr)
|
||||
continue;
|
||||
timer->it.cpu.expires.cpu += incr;
|
||||
timer->it_overrun += 1 << i;
|
||||
delta -= incr;
|
||||
}
|
||||
/* Don't use (incr*2 < delta), incr*2 might overflow. */
|
||||
for (i = 0; incr < delta - incr; i++)
|
||||
incr = incr << 1;
|
||||
|
||||
for (; i >= 0; incr >>= 1, i--) {
|
||||
if (delta < incr)
|
||||
continue;
|
||||
|
||||
timer->it.cpu.expires += incr;
|
||||
timer->it_overrun += 1 << i;
|
||||
delta -= incr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -170,21 +123,21 @@ static inline int task_cputime_zero(const struct task_cputime *cputime)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline cputime_t prof_ticks(struct task_struct *p)
|
||||
static inline unsigned long long prof_ticks(struct task_struct *p)
|
||||
{
|
||||
cputime_t utime, stime;
|
||||
|
||||
task_cputime(p, &utime, &stime);
|
||||
|
||||
return utime + stime;
|
||||
return cputime_to_expires(utime + stime);
|
||||
}
|
||||
static inline cputime_t virt_ticks(struct task_struct *p)
|
||||
static inline unsigned long long virt_ticks(struct task_struct *p)
|
||||
{
|
||||
cputime_t utime;
|
||||
|
||||
task_cputime(p, &utime, NULL);
|
||||
|
||||
return utime;
|
||||
return cputime_to_expires(utime);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -225,19 +178,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
|
||||
* Sample a per-thread clock for the given task.
|
||||
*/
|
||||
static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
|
||||
union cpu_time_count *cpu)
|
||||
unsigned long long *sample)
|
||||
{
|
||||
switch (CPUCLOCK_WHICH(which_clock)) {
|
||||
default:
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
cpu->cpu = prof_ticks(p);
|
||||
*sample = prof_ticks(p);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
cpu->cpu = virt_ticks(p);
|
||||
*sample = virt_ticks(p);
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = task_sched_runtime(p);
|
||||
*sample = task_sched_runtime(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
@@ -284,7 +237,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
|
||||
*/
|
||||
static int cpu_clock_sample_group(const clockid_t which_clock,
|
||||
struct task_struct *p,
|
||||
union cpu_time_count *cpu)
|
||||
unsigned long long *sample)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
@@ -293,15 +246,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
thread_group_cputime(p, &cputime);
|
||||
cpu->cpu = cputime.utime + cputime.stime;
|
||||
*sample = cputime_to_expires(cputime.utime + cputime.stime);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
thread_group_cputime(p, &cputime);
|
||||
cpu->cpu = cputime.utime;
|
||||
*sample = cputime_to_expires(cputime.utime);
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
thread_group_cputime(p, &cputime);
|
||||
cpu->sched = cputime.sum_exec_runtime;
|
||||
*sample = cputime.sum_exec_runtime;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
@@ -312,7 +265,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
|
||||
{
|
||||
const pid_t pid = CPUCLOCK_PID(which_clock);
|
||||
int error = -EINVAL;
|
||||
union cpu_time_count rtn;
|
||||
unsigned long long rtn;
|
||||
|
||||
if (pid == 0) {
|
||||
/*
|
||||
@@ -446,6 +399,15 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void cleanup_timers_list(struct list_head *head,
|
||||
unsigned long long curr)
|
||||
{
|
||||
struct cpu_timer_list *timer, *next;
|
||||
|
||||
list_for_each_entry_safe(timer, next, head, entry)
|
||||
list_del_init(&timer->entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clean out CPU timers still ticking when a thread exited. The task
|
||||
* pointer is cleared, and the expiry time is replaced with the residual
|
||||
@@ -456,37 +418,12 @@ static void cleanup_timers(struct list_head *head,
|
||||
cputime_t utime, cputime_t stime,
|
||||
unsigned long long sum_exec_runtime)
|
||||
{
|
||||
struct cpu_timer_list *timer, *next;
|
||||
|
||||
cputime_t ptime = utime + stime;
|
||||
|
||||
list_for_each_entry_safe(timer, next, head, entry) {
|
||||
list_del_init(&timer->entry);
|
||||
if (timer->expires.cpu < ptime) {
|
||||
timer->expires.cpu = 0;
|
||||
} else {
|
||||
timer->expires.cpu -= ptime;
|
||||
}
|
||||
}
|
||||
|
||||
++head;
|
||||
list_for_each_entry_safe(timer, next, head, entry) {
|
||||
list_del_init(&timer->entry);
|
||||
if (timer->expires.cpu < utime) {
|
||||
timer->expires.cpu = 0;
|
||||
} else {
|
||||
timer->expires.cpu -= utime;
|
||||
}
|
||||
}
|
||||
|
||||
++head;
|
||||
list_for_each_entry_safe(timer, next, head, entry) {
|
||||
list_del_init(&timer->entry);
|
||||
if (timer->expires.sched < sum_exec_runtime) {
|
||||
timer->expires.sched = 0;
|
||||
} else {
|
||||
timer->expires.sched -= sum_exec_runtime;
|
||||
}
|
||||
}
|
||||
cleanup_timers_list(head, cputime_to_expires(ptime));
|
||||
cleanup_timers_list(++head, cputime_to_expires(utime));
|
||||
cleanup_timers_list(++head, sum_exec_runtime);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -516,17 +453,21 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
|
||||
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
|
||||
}
|
||||
|
||||
static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
|
||||
static void clear_dead_task(struct k_itimer *itimer, unsigned long long now)
|
||||
{
|
||||
struct cpu_timer_list *timer = &itimer->it.cpu;
|
||||
|
||||
/*
|
||||
* That's all for this thread or process.
|
||||
* We leave our residual in expires to be reported.
|
||||
*/
|
||||
put_task_struct(timer->it.cpu.task);
|
||||
timer->it.cpu.task = NULL;
|
||||
timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
|
||||
timer->it.cpu.expires,
|
||||
now);
|
||||
put_task_struct(timer->task);
|
||||
timer->task = NULL;
|
||||
if (timer->expires < now) {
|
||||
timer->expires = 0;
|
||||
} else {
|
||||
timer->expires -= now;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int expires_gt(cputime_t expires, cputime_t new_exp)
|
||||
@@ -558,14 +499,14 @@ static void arm_timer(struct k_itimer *timer)
|
||||
|
||||
listpos = head;
|
||||
list_for_each_entry(next, head, entry) {
|
||||
if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
|
||||
if (nt->expires < next->expires)
|
||||
break;
|
||||
listpos = &next->entry;
|
||||
}
|
||||
list_add(&nt->entry, listpos);
|
||||
|
||||
if (listpos == head) {
|
||||
union cpu_time_count *exp = &nt->expires;
|
||||
unsigned long long exp = nt->expires;
|
||||
|
||||
/*
|
||||
* We are the new earliest-expiring POSIX 1.b timer, hence
|
||||
@@ -576,17 +517,17 @@ static void arm_timer(struct k_itimer *timer)
|
||||
|
||||
switch (CPUCLOCK_WHICH(timer->it_clock)) {
|
||||
case CPUCLOCK_PROF:
|
||||
if (expires_gt(cputime_expires->prof_exp, exp->cpu))
|
||||
cputime_expires->prof_exp = exp->cpu;
|
||||
if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
|
||||
cputime_expires->prof_exp = expires_to_cputime(exp);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
if (expires_gt(cputime_expires->virt_exp, exp->cpu))
|
||||
cputime_expires->virt_exp = exp->cpu;
|
||||
if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
|
||||
cputime_expires->virt_exp = expires_to_cputime(exp);
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
if (cputime_expires->sched_exp == 0 ||
|
||||
cputime_expires->sched_exp > exp->sched)
|
||||
cputime_expires->sched_exp = exp->sched;
|
||||
cputime_expires->sched_exp > exp)
|
||||
cputime_expires->sched_exp = exp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -601,20 +542,20 @@ static void cpu_timer_fire(struct k_itimer *timer)
|
||||
/*
|
||||
* User don't want any signal.
|
||||
*/
|
||||
timer->it.cpu.expires.sched = 0;
|
||||
timer->it.cpu.expires = 0;
|
||||
} else if (unlikely(timer->sigq == NULL)) {
|
||||
/*
|
||||
* This a special case for clock_nanosleep,
|
||||
* not a normal timer from sys_timer_create.
|
||||
*/
|
||||
wake_up_process(timer->it_process);
|
||||
timer->it.cpu.expires.sched = 0;
|
||||
} else if (timer->it.cpu.incr.sched == 0) {
|
||||
timer->it.cpu.expires = 0;
|
||||
} else if (timer->it.cpu.incr == 0) {
|
||||
/*
|
||||
* One-shot timer. Clear it as soon as it's fired.
|
||||
*/
|
||||
posix_timer_event(timer, 0);
|
||||
timer->it.cpu.expires.sched = 0;
|
||||
timer->it.cpu.expires = 0;
|
||||
} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
|
||||
/*
|
||||
* The signal did not get queued because the signal
|
||||
@@ -632,7 +573,7 @@ static void cpu_timer_fire(struct k_itimer *timer)
|
||||
*/
|
||||
static int cpu_timer_sample_group(const clockid_t which_clock,
|
||||
struct task_struct *p,
|
||||
union cpu_time_count *cpu)
|
||||
unsigned long long *sample)
|
||||
{
|
||||
struct task_cputime cputime;
|
||||
|
||||
@@ -641,13 +582,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
|
||||
default:
|
||||
return -EINVAL;
|
||||
case CPUCLOCK_PROF:
|
||||
cpu->cpu = cputime.utime + cputime.stime;
|
||||
*sample = cputime_to_expires(cputime.utime + cputime.stime);
|
||||
break;
|
||||
case CPUCLOCK_VIRT:
|
||||
cpu->cpu = cputime.utime;
|
||||
*sample = cputime_to_expires(cputime.utime);
|
||||
break;
|
||||
case CPUCLOCK_SCHED:
|
||||
cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
|
||||
*sample = cputime.sum_exec_runtime + task_delta_exec(p);
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
@@ -694,7 +635,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
struct itimerspec *new, struct itimerspec *old)
|
||||
{
|
||||
struct task_struct *p = timer->it.cpu.task;
|
||||
union cpu_time_count old_expires, new_expires, old_incr, val;
|
||||
unsigned long long old_expires, new_expires, old_incr, val;
|
||||
int ret;
|
||||
|
||||
if (unlikely(p == NULL)) {
|
||||
@@ -749,7 +690,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
}
|
||||
|
||||
if (old) {
|
||||
if (old_expires.sched == 0) {
|
||||
if (old_expires == 0) {
|
||||
old->it_value.tv_sec = 0;
|
||||
old->it_value.tv_nsec = 0;
|
||||
} else {
|
||||
@@ -764,11 +705,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
* new setting.
|
||||
*/
|
||||
bump_cpu_timer(timer, val);
|
||||
if (cpu_time_before(timer->it_clock, val,
|
||||
timer->it.cpu.expires)) {
|
||||
old_expires = cpu_time_sub(
|
||||
timer->it_clock,
|
||||
timer->it.cpu.expires, val);
|
||||
if (val < timer->it.cpu.expires) {
|
||||
old_expires = timer->it.cpu.expires - val;
|
||||
sample_to_timespec(timer->it_clock,
|
||||
old_expires,
|
||||
&old->it_value);
|
||||
@@ -791,8 +729,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
|
||||
cpu_time_add(timer->it_clock, &new_expires, val);
|
||||
if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
|
||||
new_expires += val;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -801,8 +739,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
* arm the timer (we'll just fake it for timer_gettime).
|
||||
*/
|
||||
timer->it.cpu.expires = new_expires;
|
||||
if (new_expires.sched != 0 &&
|
||||
cpu_time_before(timer->it_clock, val, new_expires)) {
|
||||
if (new_expires != 0 && val < new_expires) {
|
||||
arm_timer(timer);
|
||||
}
|
||||
|
||||
@@ -826,8 +763,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
timer->it_overrun_last = 0;
|
||||
timer->it_overrun = -1;
|
||||
|
||||
if (new_expires.sched != 0 &&
|
||||
!cpu_time_before(timer->it_clock, val, new_expires)) {
|
||||
if (new_expires != 0 && !(val < new_expires)) {
|
||||
/*
|
||||
* The designated time already passed, so we notify
|
||||
* immediately, even if the thread never runs to
|
||||
@@ -849,7 +785,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
|
||||
|
||||
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
{
|
||||
union cpu_time_count now;
|
||||
unsigned long long now;
|
||||
struct task_struct *p = timer->it.cpu.task;
|
||||
int clear_dead;
|
||||
|
||||
@@ -859,7 +795,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
sample_to_timespec(timer->it_clock,
|
||||
timer->it.cpu.incr, &itp->it_interval);
|
||||
|
||||
if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
|
||||
if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */
|
||||
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
|
||||
return;
|
||||
}
|
||||
@@ -891,7 +827,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
*/
|
||||
put_task_struct(p);
|
||||
timer->it.cpu.task = NULL;
|
||||
timer->it.cpu.expires.sched = 0;
|
||||
timer->it.cpu.expires = 0;
|
||||
read_unlock(&tasklist_lock);
|
||||
goto dead;
|
||||
} else {
|
||||
@@ -912,10 +848,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
goto dead;
|
||||
}
|
||||
|
||||
if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
|
||||
if (now < timer->it.cpu.expires) {
|
||||
sample_to_timespec(timer->it_clock,
|
||||
cpu_time_sub(timer->it_clock,
|
||||
timer->it.cpu.expires, now),
|
||||
timer->it.cpu.expires - now,
|
||||
&itp->it_value);
|
||||
} else {
|
||||
/*
|
||||
@@ -927,6 +862,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long long
|
||||
check_timers_list(struct list_head *timers,
|
||||
struct list_head *firing,
|
||||
unsigned long long curr)
|
||||
{
|
||||
int maxfire = 20;
|
||||
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *t;
|
||||
|
||||
t = list_first_entry(timers, struct cpu_timer_list, entry);
|
||||
|
||||
if (!--maxfire || curr < t->expires)
|
||||
return t->expires;
|
||||
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for any per-thread CPU timers that have fired and move them off
|
||||
* the tsk->cpu_timers[N] list onto the firing list. Here we update the
|
||||
@@ -935,54 +892,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
|
||||
static void check_thread_timers(struct task_struct *tsk,
|
||||
struct list_head *firing)
|
||||
{
|
||||
int maxfire;
|
||||
struct list_head *timers = tsk->cpu_timers;
|
||||
struct signal_struct *const sig = tsk->signal;
|
||||
struct task_cputime *tsk_expires = &tsk->cputime_expires;
|
||||
unsigned long long expires;
|
||||
unsigned long soft;
|
||||
|
||||
maxfire = 20;
|
||||
tsk->cputime_expires.prof_exp = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *t = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
|
||||
tsk->cputime_expires.prof_exp = t->expires.cpu;
|
||||
break;
|
||||
}
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
expires = check_timers_list(timers, firing, prof_ticks(tsk));
|
||||
tsk_expires->prof_exp = expires_to_cputime(expires);
|
||||
|
||||
++timers;
|
||||
maxfire = 20;
|
||||
tsk->cputime_expires.virt_exp = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *t = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
|
||||
tsk->cputime_expires.virt_exp = t->expires.cpu;
|
||||
break;
|
||||
}
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
expires = check_timers_list(++timers, firing, virt_ticks(tsk));
|
||||
tsk_expires->virt_exp = expires_to_cputime(expires);
|
||||
|
||||
++timers;
|
||||
maxfire = 20;
|
||||
tsk->cputime_expires.sched_exp = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *t = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
|
||||
tsk->cputime_expires.sched_exp = t->expires.sched;
|
||||
break;
|
||||
}
|
||||
t->firing = 1;
|
||||
list_move_tail(&t->entry, firing);
|
||||
}
|
||||
tsk_expires->sched_exp = check_timers_list(++timers, firing,
|
||||
tsk->se.sum_exec_runtime);
|
||||
|
||||
/*
|
||||
* Check for the special case thread timers.
|
||||
@@ -1030,7 +953,8 @@ static void stop_process_timers(struct signal_struct *sig)
|
||||
static u32 onecputick;
|
||||
|
||||
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
|
||||
cputime_t *expires, cputime_t cur_time, int signo)
|
||||
unsigned long long *expires,
|
||||
unsigned long long cur_time, int signo)
|
||||
{
|
||||
if (!it->expires)
|
||||
return;
|
||||
@@ -1066,9 +990,8 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
|
||||
static void check_process_timers(struct task_struct *tsk,
|
||||
struct list_head *firing)
|
||||
{
|
||||
int maxfire;
|
||||
struct signal_struct *const sig = tsk->signal;
|
||||
cputime_t utime, ptime, virt_expires, prof_expires;
|
||||
unsigned long long utime, ptime, virt_expires, prof_expires;
|
||||
unsigned long long sum_sched_runtime, sched_expires;
|
||||
struct list_head *timers = sig->cpu_timers;
|
||||
struct task_cputime cputime;
|
||||
@@ -1078,52 +1001,13 @@ static void check_process_timers(struct task_struct *tsk,
|
||||
* Collect the current process totals.
|
||||
*/
|
||||
thread_group_cputimer(tsk, &cputime);
|
||||
utime = cputime.utime;
|
||||
ptime = utime + cputime.stime;
|
||||
utime = cputime_to_expires(cputime.utime);
|
||||
ptime = utime + cputime_to_expires(cputime.stime);
|
||||
sum_sched_runtime = cputime.sum_exec_runtime;
|
||||
maxfire = 20;
|
||||
prof_expires = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *tl = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || ptime < tl->expires.cpu) {
|
||||
prof_expires = tl->expires.cpu;
|
||||
break;
|
||||
}
|
||||
tl->firing = 1;
|
||||
list_move_tail(&tl->entry, firing);
|
||||
}
|
||||
|
||||
++timers;
|
||||
maxfire = 20;
|
||||
virt_expires = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *tl = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || utime < tl->expires.cpu) {
|
||||
virt_expires = tl->expires.cpu;
|
||||
break;
|
||||
}
|
||||
tl->firing = 1;
|
||||
list_move_tail(&tl->entry, firing);
|
||||
}
|
||||
|
||||
++timers;
|
||||
maxfire = 20;
|
||||
sched_expires = 0;
|
||||
while (!list_empty(timers)) {
|
||||
struct cpu_timer_list *tl = list_first_entry(timers,
|
||||
struct cpu_timer_list,
|
||||
entry);
|
||||
if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
|
||||
sched_expires = tl->expires.sched;
|
||||
break;
|
||||
}
|
||||
tl->firing = 1;
|
||||
list_move_tail(&tl->entry, firing);
|
||||
}
|
||||
prof_expires = check_timers_list(timers, firing, ptime);
|
||||
virt_expires = check_timers_list(++timers, firing, utime);
|
||||
sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
|
||||
|
||||
/*
|
||||
* Check for the special case process timers.
|
||||
@@ -1162,8 +1046,8 @@ static void check_process_timers(struct task_struct *tsk,
|
||||
}
|
||||
}
|
||||
|
||||
sig->cputime_expires.prof_exp = prof_expires;
|
||||
sig->cputime_expires.virt_exp = virt_expires;
|
||||
sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
|
||||
sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
|
||||
sig->cputime_expires.sched_exp = sched_expires;
|
||||
if (task_cputime_zero(&sig->cputime_expires))
|
||||
stop_process_timers(sig);
|
||||
@@ -1176,7 +1060,7 @@ static void check_process_timers(struct task_struct *tsk,
|
||||
void posix_cpu_timer_schedule(struct k_itimer *timer)
|
||||
{
|
||||
struct task_struct *p = timer->it.cpu.task;
|
||||
union cpu_time_count now;
|
||||
unsigned long long now;
|
||||
|
||||
if (unlikely(p == NULL))
|
||||
/*
|
||||
@@ -1205,7 +1089,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
|
||||
*/
|
||||
put_task_struct(p);
|
||||
timer->it.cpu.task = p = NULL;
|
||||
timer->it.cpu.expires.sched = 0;
|
||||
timer->it.cpu.expires = 0;
|
||||
goto out_unlock;
|
||||
} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
|
||||
/*
|
||||
@@ -1213,6 +1097,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
|
||||
* not yet reaped. Take this opportunity to
|
||||
* drop our task ref.
|
||||
*/
|
||||
cpu_timer_sample_group(timer->it_clock, p, &now);
|
||||
clear_dead_task(timer, now);
|
||||
goto out_unlock;
|
||||
}
|
||||
@@ -1387,7 +1272,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
|
||||
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
||||
cputime_t *newval, cputime_t *oldval)
|
||||
{
|
||||
union cpu_time_count now;
|
||||
unsigned long long now;
|
||||
|
||||
BUG_ON(clock_idx == CPUCLOCK_SCHED);
|
||||
cpu_timer_sample_group(clock_idx, tsk, &now);
|
||||
@@ -1399,17 +1284,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
|
||||
* it to be absolute.
|
||||
*/
|
||||
if (*oldval) {
|
||||
if (*oldval <= now.cpu) {
|
||||
if (*oldval <= now) {
|
||||
/* Just about to fire. */
|
||||
*oldval = cputime_one_jiffy;
|
||||
} else {
|
||||
*oldval -= now.cpu;
|
||||
*oldval -= now;
|
||||
}
|
||||
}
|
||||
|
||||
if (!*newval)
|
||||
goto out;
|
||||
*newval += now.cpu;
|
||||
*newval += now;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1459,7 +1344,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
|
||||
}
|
||||
|
||||
while (!signal_pending(current)) {
|
||||
if (timer.it.cpu.expires.sched == 0) {
|
||||
if (timer.it.cpu.expires == 0) {
|
||||
/*
|
||||
* Our timer fired and was reset, below
|
||||
* deletion can not fail.
|
||||
|
@@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work)
|
||||
|
||||
mutex_lock(&autosleep_lock);
|
||||
|
||||
if (!pm_save_wakeup_count(initial_count)) {
|
||||
if (!pm_save_wakeup_count(initial_count) ||
|
||||
system_state != SYSTEM_RUNNING) {
|
||||
mutex_unlock(&autosleep_lock);
|
||||
goto out;
|
||||
}
|
||||
|
@@ -1369,9 +1369,9 @@ static int console_trylock_for_printk(unsigned int cpu)
|
||||
}
|
||||
}
|
||||
logbuf_cpu = UINT_MAX;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
if (wake)
|
||||
up(&console_sem);
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
return retval;
|
||||
}
|
||||
|
||||
@@ -1921,7 +1921,7 @@ void resume_console(void)
|
||||
* called when a new CPU comes online (or fails to come up), and ensures
|
||||
* that any such output gets printed.
|
||||
*/
|
||||
static int __cpuinit console_cpu_notify(struct notifier_block *self,
|
||||
static int console_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action) {
|
||||
|
@@ -331,7 +331,7 @@ out:
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static int __cpuinit profile_cpu_callback(struct notifier_block *info,
|
||||
static int profile_cpu_callback(struct notifier_block *info,
|
||||
unsigned long action, void *__cpu)
|
||||
{
|
||||
int node, cpu = (unsigned long)__cpu;
|
||||
|
@@ -469,6 +469,7 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
|
||||
/* Architecture-specific hardware disable .. */
|
||||
ptrace_disable(child);
|
||||
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
|
||||
flush_ptrace_hw_breakpoint(child);
|
||||
|
||||
write_lock_irq(&tasklist_lock);
|
||||
/*
|
||||
@@ -1221,19 +1222,3 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
int ptrace_get_breakpoints(struct task_struct *tsk)
|
||||
{
|
||||
if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt))
|
||||
return 0;
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
void ptrace_put_breakpoints(struct task_struct *tsk)
|
||||
{
|
||||
if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt))
|
||||
flush_ptrace_hw_breakpoint(tsk);
|
||||
}
|
||||
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
|
||||
|
@@ -1476,7 +1476,7 @@ rcu_torture_shutdown(void *arg)
|
||||
* Execute random CPU-hotplug operations at the interval specified
|
||||
* by the onoff_interval.
|
||||
*/
|
||||
static int __cpuinit
|
||||
static int
|
||||
rcu_torture_onoff(void *arg)
|
||||
{
|
||||
int cpu;
|
||||
@@ -1558,7 +1558,7 @@ rcu_torture_onoff(void *arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cpuinit
|
||||
static int
|
||||
rcu_torture_onoff_init(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -1601,7 +1601,7 @@ static void rcu_torture_onoff_cleanup(void)
|
||||
* CPU-stall kthread. It waits as specified by stall_cpu_holdoff, then
|
||||
* induces a CPU stall for the time specified by stall_cpu.
|
||||
*/
|
||||
static int __cpuinit rcu_torture_stall(void *args)
|
||||
static int rcu_torture_stall(void *args)
|
||||
{
|
||||
unsigned long stop_at;
|
||||
|
||||
|
@@ -2910,7 +2910,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||
* can accept some slop in the rsp->completed access due to the fact
|
||||
* that this CPU cannot possibly have any RCU callbacks in flight yet.
|
||||
*/
|
||||
static void __cpuinit
|
||||
static void
|
||||
rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -2962,7 +2962,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
|
||||
mutex_unlock(&rsp->onoff_mutex);
|
||||
}
|
||||
|
||||
static void __cpuinit rcu_prepare_cpu(int cpu)
|
||||
static void rcu_prepare_cpu(int cpu)
|
||||
{
|
||||
struct rcu_state *rsp;
|
||||
|
||||
@@ -2974,7 +2974,7 @@ static void __cpuinit rcu_prepare_cpu(int cpu)
|
||||
/*
|
||||
* Handle CPU online/offline notification events.
|
||||
*/
|
||||
static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
static int rcu_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
|
@@ -521,10 +521,10 @@ static void invoke_rcu_callbacks_kthread(void);
|
||||
static bool rcu_is_callbacks_kthread(void);
|
||||
#ifdef CONFIG_RCU_BOOST
|
||||
static void rcu_preempt_do_callbacks(void);
|
||||
static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp);
|
||||
#endif /* #ifdef CONFIG_RCU_BOOST */
|
||||
static void __cpuinit rcu_prepare_kthreads(int cpu);
|
||||
static void rcu_prepare_kthreads(int cpu);
|
||||
static void rcu_cleanup_after_idle(int cpu);
|
||||
static void rcu_prepare_for_idle(int cpu);
|
||||
static void rcu_idle_count_callbacks_posted(void);
|
||||
|
@@ -1352,7 +1352,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
|
||||
* already exist. We only create this kthread for preemptible RCU.
|
||||
* Returns zero if all is well, a negated errno otherwise.
|
||||
*/
|
||||
static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
|
||||
struct rcu_node *rnp)
|
||||
{
|
||||
int rnp_index = rnp - &rsp->node[0];
|
||||
@@ -1507,7 +1507,7 @@ static int __init rcu_spawn_kthreads(void)
|
||||
}
|
||||
early_initcall(rcu_spawn_kthreads);
|
||||
|
||||
static void __cpuinit rcu_prepare_kthreads(int cpu)
|
||||
static void rcu_prepare_kthreads(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
|
||||
struct rcu_node *rnp = rdp->mynode;
|
||||
@@ -1549,7 +1549,7 @@ static int __init rcu_scheduler_really_started(void)
|
||||
}
|
||||
early_initcall(rcu_scheduler_really_started);
|
||||
|
||||
static void __cpuinit rcu_prepare_kthreads(int cpu)
|
||||
static void rcu_prepare_kthreads(int cpu)
|
||||
{
|
||||
}
|
||||
|
||||
|
419
kernel/reboot.c
Normal file
419
kernel/reboot.c
Normal file
@@ -0,0 +1,419 @@
|
||||
/*
|
||||
* linux/kernel/reboot.c
|
||||
*
|
||||
* Copyright (C) 2013 Linus Torvalds
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "reboot: " fmt
|
||||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/kmsg_dump.h>
|
||||
#include <linux/reboot.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
/*
|
||||
* this indicates whether you can reboot with ctrl-alt-del: the default is yes
|
||||
*/
|
||||
|
||||
int C_A_D = 1;
|
||||
struct pid *cad_pid;
|
||||
EXPORT_SYMBOL(cad_pid);
|
||||
|
||||
#if defined(CONFIG_ARM) || defined(CONFIG_UNICORE32)
|
||||
#define DEFAULT_REBOOT_MODE = REBOOT_HARD
|
||||
#else
|
||||
#define DEFAULT_REBOOT_MODE
|
||||
#endif
|
||||
enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
|
||||
|
||||
int reboot_default;
|
||||
int reboot_cpu;
|
||||
enum reboot_type reboot_type = BOOT_ACPI;
|
||||
int reboot_force;
|
||||
|
||||
/*
|
||||
* If set, this is used for preparing the system to power off.
|
||||
*/
|
||||
|
||||
void (*pm_power_off_prepare)(void);
|
||||
|
||||
/**
|
||||
* emergency_restart - reboot the system
|
||||
*
|
||||
* Without shutting down any hardware or taking any locks
|
||||
* reboot the system. This is called when we know we are in
|
||||
* trouble so this is our best effort to reboot. This is
|
||||
* safe to call in interrupt context.
|
||||
*/
|
||||
void emergency_restart(void)
|
||||
{
|
||||
kmsg_dump(KMSG_DUMP_EMERG);
|
||||
machine_emergency_restart();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(emergency_restart);
|
||||
|
||||
void kernel_restart_prepare(char *cmd)
|
||||
{
|
||||
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
|
||||
system_state = SYSTEM_RESTART;
|
||||
usermodehelper_disable();
|
||||
device_shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* register_reboot_notifier - Register function to be called at reboot time
|
||||
* @nb: Info about notifier function to be called
|
||||
*
|
||||
* Registers a function with the list of functions
|
||||
* to be called at reboot time.
|
||||
*
|
||||
* Currently always returns zero, as blocking_notifier_chain_register()
|
||||
* always returns zero.
|
||||
*/
|
||||
int register_reboot_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return blocking_notifier_chain_register(&reboot_notifier_list, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(register_reboot_notifier);
|
||||
|
||||
/**
|
||||
* unregister_reboot_notifier - Unregister previously registered reboot notifier
|
||||
* @nb: Hook to be unregistered
|
||||
*
|
||||
* Unregisters a previously registered reboot
|
||||
* notifier function.
|
||||
*
|
||||
* Returns zero on success, or %-ENOENT on failure.
|
||||
*/
|
||||
int unregister_reboot_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_reboot_notifier);
|
||||
|
||||
static void migrate_to_reboot_cpu(void)
|
||||
{
|
||||
/* The boot cpu is always logical cpu 0 */
|
||||
int cpu = reboot_cpu;
|
||||
|
||||
cpu_hotplug_disable();
|
||||
|
||||
/* Make certain the cpu I'm about to reboot on is online */
|
||||
if (!cpu_online(cpu))
|
||||
cpu = cpumask_first(cpu_online_mask);
|
||||
|
||||
/* Prevent races with other tasks migrating this task */
|
||||
current->flags |= PF_NO_SETAFFINITY;
|
||||
|
||||
/* Make certain I only run on the appropriate processor */
|
||||
set_cpus_allowed_ptr(current, cpumask_of(cpu));
|
||||
}
|
||||
|
||||
/**
|
||||
* kernel_restart - reboot the system
|
||||
* @cmd: pointer to buffer containing command to execute for restart
|
||||
* or %NULL
|
||||
*
|
||||
* Shutdown everything and perform a clean reboot.
|
||||
* This is not safe to call in interrupt context.
|
||||
*/
|
||||
void kernel_restart(char *cmd)
|
||||
{
|
||||
kernel_restart_prepare(cmd);
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
if (!cmd)
|
||||
pr_emerg("Restarting system\n");
|
||||
else
|
||||
pr_emerg("Restarting system with command '%s'\n", cmd);
|
||||
kmsg_dump(KMSG_DUMP_RESTART);
|
||||
machine_restart(cmd);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_restart);
|
||||
|
||||
static void kernel_shutdown_prepare(enum system_states state)
|
||||
{
|
||||
blocking_notifier_call_chain(&reboot_notifier_list,
|
||||
(state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL);
|
||||
system_state = state;
|
||||
usermodehelper_disable();
|
||||
device_shutdown();
|
||||
}
|
||||
/**
|
||||
* kernel_halt - halt the system
|
||||
*
|
||||
* Shutdown everything and perform a clean system halt.
|
||||
*/
|
||||
void kernel_halt(void)
|
||||
{
|
||||
kernel_shutdown_prepare(SYSTEM_HALT);
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
pr_emerg("System halted\n");
|
||||
kmsg_dump(KMSG_DUMP_HALT);
|
||||
machine_halt();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_halt);
|
||||
|
||||
/**
|
||||
* kernel_power_off - power_off the system
|
||||
*
|
||||
* Shutdown everything and perform a clean system power_off.
|
||||
*/
|
||||
void kernel_power_off(void)
|
||||
{
|
||||
kernel_shutdown_prepare(SYSTEM_POWER_OFF);
|
||||
if (pm_power_off_prepare)
|
||||
pm_power_off_prepare();
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
pr_emerg("Power down\n");
|
||||
kmsg_dump(KMSG_DUMP_POWEROFF);
|
||||
machine_power_off();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_power_off);
|
||||
|
||||
static DEFINE_MUTEX(reboot_mutex);
|
||||
|
||||
/*
|
||||
* Reboot system call: for obvious reasons only root may call it,
|
||||
* and even root needs to set up some magic numbers in the registers
|
||||
* so that some mistake won't make this reboot the whole machine.
|
||||
* You can also set the meaning of the ctrl-alt-del-key here.
|
||||
*
|
||||
* reboot doesn't sync: do that yourself before calling this.
|
||||
*/
|
||||
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
|
||||
void __user *, arg)
|
||||
{
|
||||
struct pid_namespace *pid_ns = task_active_pid_ns(current);
|
||||
char buffer[256];
|
||||
int ret = 0;
|
||||
|
||||
/* We only trust the superuser with rebooting the system. */
|
||||
if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
|
||||
return -EPERM;
|
||||
|
||||
/* For safety, we require "magic" arguments. */
|
||||
if (magic1 != LINUX_REBOOT_MAGIC1 ||
|
||||
(magic2 != LINUX_REBOOT_MAGIC2 &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2A &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2B &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2C))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If pid namespaces are enabled and the current task is in a child
|
||||
* pid_namespace, the command is handled by reboot_pid_ns() which will
|
||||
* call do_exit().
|
||||
*/
|
||||
ret = reboot_pid_ns(pid_ns, cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Instead of trying to make the power_off code look like
|
||||
* halt when pm_power_off is not set do it the easy way.
|
||||
*/
|
||||
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
|
||||
cmd = LINUX_REBOOT_CMD_HALT;
|
||||
|
||||
mutex_lock(&reboot_mutex);
|
||||
switch (cmd) {
|
||||
case LINUX_REBOOT_CMD_RESTART:
|
||||
kernel_restart(NULL);
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_CAD_ON:
|
||||
C_A_D = 1;
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_CAD_OFF:
|
||||
C_A_D = 0;
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_HALT:
|
||||
kernel_halt();
|
||||
do_exit(0);
|
||||
panic("cannot halt");
|
||||
|
||||
case LINUX_REBOOT_CMD_POWER_OFF:
|
||||
kernel_power_off();
|
||||
do_exit(0);
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_RESTART2:
|
||||
ret = strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1);
|
||||
if (ret < 0) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
buffer[sizeof(buffer) - 1] = '\0';
|
||||
|
||||
kernel_restart(buffer);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
case LINUX_REBOOT_CMD_KEXEC:
|
||||
ret = kernel_kexec();
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
case LINUX_REBOOT_CMD_SW_SUSPEND:
|
||||
ret = hibernate();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&reboot_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void deferred_cad(struct work_struct *dummy)
|
||||
{
|
||||
kernel_restart(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function gets called by ctrl-alt-del - ie the keyboard interrupt.
|
||||
* As it's called within an interrupt, it may NOT sync: the only choice
|
||||
* is whether to reboot at once, or just ignore the ctrl-alt-del.
|
||||
*/
|
||||
void ctrl_alt_del(void)
|
||||
{
|
||||
static DECLARE_WORK(cad_work, deferred_cad);
|
||||
|
||||
if (C_A_D)
|
||||
schedule_work(&cad_work);
|
||||
else
|
||||
kill_cad_pid(SIGINT, 1);
|
||||
}
|
||||
|
||||
char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
|
||||
|
||||
static int __orderly_poweroff(bool force)
|
||||
{
|
||||
char **argv;
|
||||
static char *envp[] = {
|
||||
"HOME=/",
|
||||
"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
|
||||
NULL
|
||||
};
|
||||
int ret;
|
||||
|
||||
argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
|
||||
if (argv) {
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
|
||||
argv_free(argv);
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
if (ret && force) {
|
||||
pr_warn("Failed to start orderly shutdown: forcing the issue\n");
|
||||
/*
|
||||
* I guess this should try to kick off some daemon to sync and
|
||||
* poweroff asap. Or not even bother syncing if we're doing an
|
||||
* emergency shutdown?
|
||||
*/
|
||||
emergency_sync();
|
||||
kernel_power_off();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool poweroff_force;
|
||||
|
||||
static void poweroff_work_func(struct work_struct *work)
|
||||
{
|
||||
__orderly_poweroff(poweroff_force);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(poweroff_work, poweroff_work_func);
|
||||
|
||||
/**
|
||||
* orderly_poweroff - Trigger an orderly system poweroff
|
||||
* @force: force poweroff if command execution fails
|
||||
*
|
||||
* This may be called from any context to trigger a system shutdown.
|
||||
* If the orderly shutdown fails, it will force an immediate shutdown.
|
||||
*/
|
||||
int orderly_poweroff(bool force)
|
||||
{
|
||||
if (force) /* do not override the pending "true" */
|
||||
poweroff_force = true;
|
||||
schedule_work(&poweroff_work);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(orderly_poweroff);
|
||||
|
||||
static int __init reboot_setup(char *str)
|
||||
{
|
||||
for (;;) {
|
||||
/*
|
||||
* Having anything passed on the command line via
|
||||
* reboot= will cause us to disable DMI checking
|
||||
* below.
|
||||
*/
|
||||
reboot_default = 0;
|
||||
|
||||
switch (*str) {
|
||||
case 'w':
|
||||
reboot_mode = REBOOT_WARM;
|
||||
break;
|
||||
|
||||
case 'c':
|
||||
reboot_mode = REBOOT_COLD;
|
||||
break;
|
||||
|
||||
case 'h':
|
||||
reboot_mode = REBOOT_HARD;
|
||||
break;
|
||||
|
||||
case 's':
|
||||
if (isdigit(*(str+1)))
|
||||
reboot_cpu = simple_strtoul(str+1, NULL, 0);
|
||||
else if (str[1] == 'm' && str[2] == 'p' &&
|
||||
isdigit(*(str+3)))
|
||||
reboot_cpu = simple_strtoul(str+3, NULL, 0);
|
||||
else
|
||||
reboot_mode = REBOOT_SOFT;
|
||||
break;
|
||||
|
||||
case 'g':
|
||||
reboot_mode = REBOOT_GPIO;
|
||||
break;
|
||||
|
||||
case 'b':
|
||||
case 'a':
|
||||
case 'k':
|
||||
case 't':
|
||||
case 'e':
|
||||
case 'p':
|
||||
reboot_type = *str;
|
||||
break;
|
||||
|
||||
case 'f':
|
||||
reboot_force = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
str = strchr(str, ',');
|
||||
if (str)
|
||||
str++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("reboot=", reboot_setup);
|
@@ -516,7 +516,7 @@ static void setup_callbacks(struct rchan *chan,
|
||||
*
|
||||
* Returns the success/failure of the operation. (%NOTIFY_OK, %NOTIFY_BAD)
|
||||
*/
|
||||
static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
|
||||
static int relay_hotcpu_callback(struct notifier_block *nb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
|
@@ -370,13 +370,6 @@ static struct rq *this_rq_lock(void)
|
||||
#ifdef CONFIG_SCHED_HRTICK
|
||||
/*
|
||||
* Use HR-timers to deliver accurate preemption points.
|
||||
*
|
||||
* Its all a bit involved since we cannot program an hrt while holding the
|
||||
* rq->lock. So what we do is store a state in in rq->hrtick_* and ask for a
|
||||
* reschedule event.
|
||||
*
|
||||
* When we get rescheduled we reprogram the hrtick_timer outside of the
|
||||
* rq->lock.
|
||||
*/
|
||||
|
||||
static void hrtick_clear(struct rq *rq)
|
||||
@@ -404,6 +397,15 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static int __hrtick_restart(struct rq *rq)
|
||||
{
|
||||
struct hrtimer *timer = &rq->hrtick_timer;
|
||||
ktime_t time = hrtimer_get_softexpires(timer);
|
||||
|
||||
return __hrtimer_start_range_ns(timer, time, 0, HRTIMER_MODE_ABS_PINNED, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* called from hardirq (IPI) context
|
||||
*/
|
||||
@@ -412,7 +414,7 @@ static void __hrtick_start(void *arg)
|
||||
struct rq *rq = arg;
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
hrtimer_restart(&rq->hrtick_timer);
|
||||
__hrtick_restart(rq);
|
||||
rq->hrtick_csd_pending = 0;
|
||||
raw_spin_unlock(&rq->lock);
|
||||
}
|
||||
@@ -430,7 +432,7 @@ void hrtick_start(struct rq *rq, u64 delay)
|
||||
hrtimer_set_expires(timer, time);
|
||||
|
||||
if (rq == this_rq()) {
|
||||
hrtimer_restart(timer);
|
||||
__hrtick_restart(rq);
|
||||
} else if (!rq->hrtick_csd_pending) {
|
||||
__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
|
||||
rq->hrtick_csd_pending = 1;
|
||||
@@ -4131,7 +4133,7 @@ void show_state_filter(unsigned long state_filter)
|
||||
debug_show_all_locks();
|
||||
}
|
||||
|
||||
void __cpuinit init_idle_bootup_task(struct task_struct *idle)
|
||||
void init_idle_bootup_task(struct task_struct *idle)
|
||||
{
|
||||
idle->sched_class = &idle_sched_class;
|
||||
}
|
||||
@@ -4144,7 +4146,7 @@ void __cpuinit init_idle_bootup_task(struct task_struct *idle)
|
||||
* NOTE: this function does not set the idle thread's NEED_RESCHED
|
||||
* flag, to make booting more robust.
|
||||
*/
|
||||
void __cpuinit init_idle(struct task_struct *idle, int cpu)
|
||||
void init_idle(struct task_struct *idle, int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
@@ -4628,7 +4630,7 @@ static void set_rq_offline(struct rq *rq)
|
||||
* migration_call - callback that gets triggered when a CPU is added.
|
||||
* Here we can start up the necessary migration thread for the new CPU.
|
||||
*/
|
||||
static int __cpuinit
|
||||
static int
|
||||
migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
int cpu = (long)hcpu;
|
||||
@@ -4682,12 +4684,12 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
* happens before everything else. This has to be lower priority than
|
||||
* the notifier in the perf_event subsystem, though.
|
||||
*/
|
||||
static struct notifier_block __cpuinitdata migration_notifier = {
|
||||
static struct notifier_block migration_notifier = {
|
||||
.notifier_call = migration_call,
|
||||
.priority = CPU_PRI_MIGRATION,
|
||||
};
|
||||
|
||||
static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
|
||||
static int sched_cpu_active(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
@@ -4700,7 +4702,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
|
||||
}
|
||||
}
|
||||
|
||||
static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
|
||||
static int sched_cpu_inactive(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
|
@@ -5506,7 +5506,7 @@ void nohz_balance_enter_idle(int cpu)
|
||||
set_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu));
|
||||
}
|
||||
|
||||
static int __cpuinit sched_ilb_notifier(struct notifier_block *nfb,
|
||||
static int sched_ilb_notifier(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
|
@@ -161,6 +161,39 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
|
||||
* on CONFIG_SCHEDSTATS.
|
||||
*/
|
||||
|
||||
/**
|
||||
* cputimer_running - return true if cputimer is running
|
||||
*
|
||||
* @tsk: Pointer to target task.
|
||||
*/
|
||||
static inline bool cputimer_running(struct task_struct *tsk)
|
||||
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* After we flush the task's sum_exec_runtime to sig->sum_sched_runtime
|
||||
* in __exit_signal(), we won't account to the signal struct further
|
||||
* cputime consumed by that task, even though the task can still be
|
||||
* ticking after __exit_signal().
|
||||
*
|
||||
* In order to keep a consistent behaviour between thread group cputime
|
||||
* and thread group cputimer accounting, lets also ignore the cputime
|
||||
* elapsing after __exit_signal() in any thread group timer running.
|
||||
*
|
||||
* This makes sure that POSIX CPU clocks and timers are synchronized, so
|
||||
* that a POSIX CPU timer won't expire while the corresponding POSIX CPU
|
||||
* clock delta is behind the expiring timer value.
|
||||
*/
|
||||
if (unlikely(!tsk->sighand))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* account_group_user_time - Maintain utime for a thread group.
|
||||
*
|
||||
@@ -176,7 +209,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
if (!cputimer_running(tsk))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
@@ -199,7 +232,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
if (!cputimer_running(tsk))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
@@ -222,7 +255,7 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,
|
||||
{
|
||||
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
|
||||
|
||||
if (!cputimer->running)
|
||||
if (!cputimer_running(tsk))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&cputimer->lock);
|
||||
|
@@ -73,7 +73,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
|
||||
static struct notifier_block hotplug_cfd_notifier = {
|
||||
.notifier_call = hotplug_cfd,
|
||||
};
|
||||
|
||||
|
@@ -24,7 +24,7 @@
|
||||
*/
|
||||
static DEFINE_PER_CPU(struct task_struct *, idle_threads);
|
||||
|
||||
struct task_struct * __cpuinit idle_thread_get(unsigned int cpu)
|
||||
struct task_struct *idle_thread_get(unsigned int cpu)
|
||||
{
|
||||
struct task_struct *tsk = per_cpu(idle_threads, cpu);
|
||||
|
||||
|
@@ -699,7 +699,7 @@ void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
|
||||
}
|
||||
EXPORT_SYMBOL(send_remote_softirq);
|
||||
|
||||
static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
|
||||
static int remote_softirq_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
/*
|
||||
@@ -728,7 +728,7 @@ static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
|
||||
static struct notifier_block remote_softirq_cpu_notifier = {
|
||||
.notifier_call = remote_softirq_cpu_notify,
|
||||
};
|
||||
|
||||
@@ -830,7 +830,7 @@ static void takeover_tasklets(unsigned int cpu)
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static int __cpuinit cpu_callback(struct notifier_block *nfb,
|
||||
static int cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
@@ -845,7 +845,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata cpu_nfb = {
|
||||
static struct notifier_block cpu_nfb = {
|
||||
.notifier_call = cpu_callback
|
||||
};
|
||||
|
||||
|
336
kernel/sys.c
336
kernel/sys.c
@@ -115,20 +115,6 @@ int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
|
||||
EXPORT_SYMBOL(fs_overflowuid);
|
||||
EXPORT_SYMBOL(fs_overflowgid);
|
||||
|
||||
/*
|
||||
* this indicates whether you can reboot with ctrl-alt-del: the default is yes
|
||||
*/
|
||||
|
||||
int C_A_D = 1;
|
||||
struct pid *cad_pid;
|
||||
EXPORT_SYMBOL(cad_pid);
|
||||
|
||||
/*
|
||||
* If set, this is used for preparing the system to power off.
|
||||
*/
|
||||
|
||||
void (*pm_power_off_prepare)(void);
|
||||
|
||||
/*
|
||||
* Returns true if current's euid is same as p's uid or euid,
|
||||
* or has CAP_SYS_NICE to p's user_ns.
|
||||
@@ -308,266 +294,6 @@ out_unlock:
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* emergency_restart - reboot the system
|
||||
*
|
||||
* Without shutting down any hardware or taking any locks
|
||||
* reboot the system. This is called when we know we are in
|
||||
* trouble so this is our best effort to reboot. This is
|
||||
* safe to call in interrupt context.
|
||||
*/
|
||||
void emergency_restart(void)
|
||||
{
|
||||
kmsg_dump(KMSG_DUMP_EMERG);
|
||||
machine_emergency_restart();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(emergency_restart);
|
||||
|
||||
void kernel_restart_prepare(char *cmd)
|
||||
{
|
||||
blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
|
||||
system_state = SYSTEM_RESTART;
|
||||
usermodehelper_disable();
|
||||
device_shutdown();
|
||||
}
|
||||
|
||||
/**
|
||||
* register_reboot_notifier - Register function to be called at reboot time
|
||||
* @nb: Info about notifier function to be called
|
||||
*
|
||||
* Registers a function with the list of functions
|
||||
* to be called at reboot time.
|
||||
*
|
||||
* Currently always returns zero, as blocking_notifier_chain_register()
|
||||
* always returns zero.
|
||||
*/
|
||||
int register_reboot_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return blocking_notifier_chain_register(&reboot_notifier_list, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(register_reboot_notifier);
|
||||
|
||||
/**
|
||||
* unregister_reboot_notifier - Unregister previously registered reboot notifier
|
||||
* @nb: Hook to be unregistered
|
||||
*
|
||||
* Unregisters a previously registered reboot
|
||||
* notifier function.
|
||||
*
|
||||
* Returns zero on success, or %-ENOENT on failure.
|
||||
*/
|
||||
int unregister_reboot_notifier(struct notifier_block *nb)
|
||||
{
|
||||
return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
|
||||
}
|
||||
EXPORT_SYMBOL(unregister_reboot_notifier);
|
||||
|
||||
/* Add backwards compatibility for stable trees. */
|
||||
#ifndef PF_NO_SETAFFINITY
|
||||
#define PF_NO_SETAFFINITY PF_THREAD_BOUND
|
||||
#endif
|
||||
|
||||
static void migrate_to_reboot_cpu(void)
|
||||
{
|
||||
/* The boot cpu is always logical cpu 0 */
|
||||
int cpu = 0;
|
||||
|
||||
cpu_hotplug_disable();
|
||||
|
||||
/* Make certain the cpu I'm about to reboot on is online */
|
||||
if (!cpu_online(cpu))
|
||||
cpu = cpumask_first(cpu_online_mask);
|
||||
|
||||
/* Prevent races with other tasks migrating this task */
|
||||
current->flags |= PF_NO_SETAFFINITY;
|
||||
|
||||
/* Make certain I only run on the appropriate processor */
|
||||
set_cpus_allowed_ptr(current, cpumask_of(cpu));
|
||||
}
|
||||
|
||||
/**
|
||||
* kernel_restart - reboot the system
|
||||
* @cmd: pointer to buffer containing command to execute for restart
|
||||
* or %NULL
|
||||
*
|
||||
* Shutdown everything and perform a clean reboot.
|
||||
* This is not safe to call in interrupt context.
|
||||
*/
|
||||
void kernel_restart(char *cmd)
|
||||
{
|
||||
kernel_restart_prepare(cmd);
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
if (!cmd)
|
||||
printk(KERN_EMERG "Restarting system.\n");
|
||||
else
|
||||
printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
|
||||
kmsg_dump(KMSG_DUMP_RESTART);
|
||||
machine_restart(cmd);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_restart);
|
||||
|
||||
static void kernel_shutdown_prepare(enum system_states state)
|
||||
{
|
||||
blocking_notifier_call_chain(&reboot_notifier_list,
|
||||
(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
|
||||
system_state = state;
|
||||
usermodehelper_disable();
|
||||
device_shutdown();
|
||||
}
|
||||
/**
|
||||
* kernel_halt - halt the system
|
||||
*
|
||||
* Shutdown everything and perform a clean system halt.
|
||||
*/
|
||||
void kernel_halt(void)
|
||||
{
|
||||
kernel_shutdown_prepare(SYSTEM_HALT);
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
printk(KERN_EMERG "System halted.\n");
|
||||
kmsg_dump(KMSG_DUMP_HALT);
|
||||
machine_halt();
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(kernel_halt);
|
||||
|
||||
/**
|
||||
* kernel_power_off - power_off the system
|
||||
*
|
||||
* Shutdown everything and perform a clean system power_off.
|
||||
*/
|
||||
void kernel_power_off(void)
|
||||
{
|
||||
kernel_shutdown_prepare(SYSTEM_POWER_OFF);
|
||||
if (pm_power_off_prepare)
|
||||
pm_power_off_prepare();
|
||||
migrate_to_reboot_cpu();
|
||||
syscore_shutdown();
|
||||
printk(KERN_EMERG "Power down.\n");
|
||||
kmsg_dump(KMSG_DUMP_POWEROFF);
|
||||
machine_power_off();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kernel_power_off);
|
||||
|
||||
static DEFINE_MUTEX(reboot_mutex);
|
||||
|
||||
/*
|
||||
* Reboot system call: for obvious reasons only root may call it,
|
||||
* and even root needs to set up some magic numbers in the registers
|
||||
* so that some mistake won't make this reboot the whole machine.
|
||||
* You can also set the meaning of the ctrl-alt-del-key here.
|
||||
*
|
||||
* reboot doesn't sync: do that yourself before calling this.
|
||||
*/
|
||||
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
|
||||
void __user *, arg)
|
||||
{
|
||||
struct pid_namespace *pid_ns = task_active_pid_ns(current);
|
||||
char buffer[256];
|
||||
int ret = 0;
|
||||
|
||||
/* We only trust the superuser with rebooting the system. */
|
||||
if (!ns_capable(pid_ns->user_ns, CAP_SYS_BOOT))
|
||||
return -EPERM;
|
||||
|
||||
/* For safety, we require "magic" arguments. */
|
||||
if (magic1 != LINUX_REBOOT_MAGIC1 ||
|
||||
(magic2 != LINUX_REBOOT_MAGIC2 &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2A &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2B &&
|
||||
magic2 != LINUX_REBOOT_MAGIC2C))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* If pid namespaces are enabled and the current task is in a child
|
||||
* pid_namespace, the command is handled by reboot_pid_ns() which will
|
||||
* call do_exit().
|
||||
*/
|
||||
ret = reboot_pid_ns(pid_ns, cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* Instead of trying to make the power_off code look like
|
||||
* halt when pm_power_off is not set do it the easy way.
|
||||
*/
|
||||
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
|
||||
cmd = LINUX_REBOOT_CMD_HALT;
|
||||
|
||||
mutex_lock(&reboot_mutex);
|
||||
switch (cmd) {
|
||||
case LINUX_REBOOT_CMD_RESTART:
|
||||
kernel_restart(NULL);
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_CAD_ON:
|
||||
C_A_D = 1;
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_CAD_OFF:
|
||||
C_A_D = 0;
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_HALT:
|
||||
kernel_halt();
|
||||
do_exit(0);
|
||||
panic("cannot halt.\n");
|
||||
|
||||
case LINUX_REBOOT_CMD_POWER_OFF:
|
||||
kernel_power_off();
|
||||
do_exit(0);
|
||||
break;
|
||||
|
||||
case LINUX_REBOOT_CMD_RESTART2:
|
||||
if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
buffer[sizeof(buffer) - 1] = '\0';
|
||||
|
||||
kernel_restart(buffer);
|
||||
break;
|
||||
|
||||
#ifdef CONFIG_KEXEC
|
||||
case LINUX_REBOOT_CMD_KEXEC:
|
||||
ret = kernel_kexec();
|
||||
break;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
case LINUX_REBOOT_CMD_SW_SUSPEND:
|
||||
ret = hibernate();
|
||||
break;
|
||||
#endif
|
||||
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&reboot_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void deferred_cad(struct work_struct *dummy)
|
||||
{
|
||||
kernel_restart(NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function gets called by ctrl-alt-del - ie the keyboard interrupt.
|
||||
* As it's called within an interrupt, it may NOT sync: the only choice
|
||||
* is whether to reboot at once, or just ignore the ctrl-alt-del.
|
||||
*/
|
||||
void ctrl_alt_del(void)
|
||||
{
|
||||
static DECLARE_WORK(cad_work, deferred_cad);
|
||||
|
||||
if (C_A_D)
|
||||
schedule_work(&cad_work);
|
||||
else
|
||||
kill_cad_pid(SIGINT, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unprivileged users may change the real gid to the effective gid
|
||||
* or vice versa. (BSD-style)
|
||||
@@ -2292,68 +2018,6 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
|
||||
return err ? -EFAULT : 0;
|
||||
}
|
||||
|
||||
char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
|
||||
|
||||
static int __orderly_poweroff(bool force)
|
||||
{
|
||||
char **argv;
|
||||
static char *envp[] = {
|
||||
"HOME=/",
|
||||
"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
|
||||
NULL
|
||||
};
|
||||
int ret;
|
||||
|
||||
argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
|
||||
if (argv) {
|
||||
ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
|
||||
argv_free(argv);
|
||||
} else {
|
||||
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
|
||||
__func__, poweroff_cmd);
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
|
||||
if (ret && force) {
|
||||
printk(KERN_WARNING "Failed to start orderly shutdown: "
|
||||
"forcing the issue\n");
|
||||
/*
|
||||
* I guess this should try to kick off some daemon to sync and
|
||||
* poweroff asap. Or not even bother syncing if we're doing an
|
||||
* emergency shutdown?
|
||||
*/
|
||||
emergency_sync();
|
||||
kernel_power_off();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool poweroff_force;
|
||||
|
||||
static void poweroff_work_func(struct work_struct *work)
|
||||
{
|
||||
__orderly_poweroff(poweroff_force);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(poweroff_work, poweroff_work_func);
|
||||
|
||||
/**
|
||||
* orderly_poweroff - Trigger an orderly system poweroff
|
||||
* @force: force poweroff if command execution fails
|
||||
*
|
||||
* This may be called from any context to trigger a system shutdown.
|
||||
* If the orderly shutdown fails, it will force an immediate shutdown.
|
||||
*/
|
||||
int orderly_poweroff(bool force)
|
||||
{
|
||||
if (force) /* do not override the pending "true" */
|
||||
poweroff_force = true;
|
||||
schedule_work(&poweroff_work);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(orderly_poweroff);
|
||||
|
||||
/**
|
||||
* do_sysinfo - fill in sysinfo struct
|
||||
* @info: pointer to buffer to fill
|
||||
|
@@ -599,6 +599,13 @@ static struct ctl_table kern_table[] = {
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
{
|
||||
.procname = "traceoff_on_warning",
|
||||
.data = &__disable_trace_on_warning,
|
||||
.maxlen = sizeof(__disable_trace_on_warning),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
},
|
||||
#endif
|
||||
#ifdef CONFIG_MODULES
|
||||
{
|
||||
@@ -800,7 +807,7 @@ static struct ctl_table kern_table[] = {
|
||||
#if defined(CONFIG_LOCKUP_DETECTOR)
|
||||
{
|
||||
.procname = "watchdog",
|
||||
.data = &watchdog_enabled,
|
||||
.data = &watchdog_user_enabled,
|
||||
.maxlen = sizeof (int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dowatchdog,
|
||||
@@ -827,7 +834,7 @@ static struct ctl_table kern_table[] = {
|
||||
},
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &watchdog_enabled,
|
||||
.data = &watchdog_user_enabled,
|
||||
.maxlen = sizeof (int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dowatchdog,
|
||||
|
@@ -3,7 +3,6 @@
|
||||
#include "../fs/xfs/xfs_sysctl.h"
|
||||
#include <linux/sunrpc/debug.h>
|
||||
#include <linux/string.h>
|
||||
#include <net/ip_vs.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/mount.h>
|
||||
|
@@ -4,6 +4,8 @@ obj-y += timeconv.o posix-clock.o alarmtimer.o
|
||||
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD) += clockevents.o
|
||||
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += tick-common.o
|
||||
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
|
||||
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
|
||||
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
|
||||
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
|
||||
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
|
||||
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
|
||||
|
@@ -199,6 +199,13 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
|
||||
|
||||
}
|
||||
|
||||
ktime_t alarm_expires_remaining(const struct alarm *alarm)
|
||||
{
|
||||
struct alarm_base *base = &alarm_bases[alarm->type];
|
||||
return ktime_sub(alarm->node.expires, base->gettime());
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_expires_remaining);
|
||||
|
||||
#ifdef CONFIG_RTC_CLASS
|
||||
/**
|
||||
* alarmtimer_suspend - Suspend time callback
|
||||
@@ -303,9 +310,10 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
|
||||
alarm->type = type;
|
||||
alarm->state = ALARMTIMER_STATE_INACTIVE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_init);
|
||||
|
||||
/**
|
||||
* alarm_start - Sets an alarm to fire
|
||||
* alarm_start - Sets an absolute alarm to fire
|
||||
* @alarm: ptr to alarm to set
|
||||
* @start: time to run the alarm
|
||||
*/
|
||||
@@ -323,6 +331,34 @@ int alarm_start(struct alarm *alarm, ktime_t start)
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_start);
|
||||
|
||||
/**
|
||||
* alarm_start_relative - Sets a relative alarm to fire
|
||||
* @alarm: ptr to alarm to set
|
||||
* @start: time relative to now to run the alarm
|
||||
*/
|
||||
int alarm_start_relative(struct alarm *alarm, ktime_t start)
|
||||
{
|
||||
struct alarm_base *base = &alarm_bases[alarm->type];
|
||||
|
||||
start = ktime_add(start, base->gettime());
|
||||
return alarm_start(alarm, start);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_start_relative);
|
||||
|
||||
void alarm_restart(struct alarm *alarm)
|
||||
{
|
||||
struct alarm_base *base = &alarm_bases[alarm->type];
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&base->lock, flags);
|
||||
hrtimer_set_expires(&alarm->timer, alarm->node.expires);
|
||||
hrtimer_restart(&alarm->timer);
|
||||
alarmtimer_enqueue(base, alarm);
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_restart);
|
||||
|
||||
/**
|
||||
* alarm_try_to_cancel - Tries to cancel an alarm timer
|
||||
@@ -344,6 +380,7 @@ int alarm_try_to_cancel(struct alarm *alarm)
|
||||
spin_unlock_irqrestore(&base->lock, flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_try_to_cancel);
|
||||
|
||||
|
||||
/**
|
||||
@@ -361,6 +398,7 @@ int alarm_cancel(struct alarm *alarm)
|
||||
cpu_relax();
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_cancel);
|
||||
|
||||
|
||||
u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
|
||||
@@ -393,8 +431,15 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval)
|
||||
alarm->node.expires = ktime_add(alarm->node.expires, interval);
|
||||
return overrun;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_forward);
|
||||
|
||||
u64 alarm_forward_now(struct alarm *alarm, ktime_t interval)
|
||||
{
|
||||
struct alarm_base *base = &alarm_bases[alarm->type];
|
||||
|
||||
return alarm_forward(alarm, base->gettime(), interval);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alarm_forward_now);
|
||||
|
||||
|
||||
/**
|
||||
|
@@ -15,20 +15,23 @@
|
||||
#include <linux/hrtimer.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/notifier.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/device.h>
|
||||
|
||||
#include "tick-internal.h"
|
||||
|
||||
/* The registered clock event devices */
|
||||
static LIST_HEAD(clockevent_devices);
|
||||
static LIST_HEAD(clockevents_released);
|
||||
|
||||
/* Notification for clock events */
|
||||
static RAW_NOTIFIER_HEAD(clockevents_chain);
|
||||
|
||||
/* Protection for the above */
|
||||
static DEFINE_RAW_SPINLOCK(clockevents_lock);
|
||||
/* Protection for unbind operations */
|
||||
static DEFINE_MUTEX(clockevents_mutex);
|
||||
|
||||
struct ce_unbind {
|
||||
struct clock_event_device *ce;
|
||||
int res;
|
||||
};
|
||||
|
||||
/**
|
||||
* clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
|
||||
@@ -232,30 +235,6 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
|
||||
return (rc && force) ? clockevents_program_min_delta(dev) : rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* clockevents_register_notifier - register a clock events change listener
|
||||
*/
|
||||
int clockevents_register_notifier(struct notifier_block *nb)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_irqsave(&clockevents_lock, flags);
|
||||
ret = raw_notifier_chain_register(&clockevents_chain, nb);
|
||||
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify about a clock event change. Called with clockevents_lock
|
||||
* held.
|
||||
*/
|
||||
static void clockevents_do_notify(unsigned long reason, void *dev)
|
||||
{
|
||||
raw_notifier_call_chain(&clockevents_chain, reason, dev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called after a notify add to make devices available which were
|
||||
* released from the notifier call.
|
||||
@@ -269,10 +248,94 @@ static void clockevents_notify_released(void)
|
||||
struct clock_event_device, list);
|
||||
list_del(&dev->list);
|
||||
list_add(&dev->list, &clockevent_devices);
|
||||
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
|
||||
tick_check_new_device(dev);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to install a replacement clock event device
|
||||
*/
|
||||
static int clockevents_replace(struct clock_event_device *ced)
|
||||
{
|
||||
struct clock_event_device *dev, *newdev = NULL;
|
||||
|
||||
list_for_each_entry(dev, &clockevent_devices, list) {
|
||||
if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
|
||||
continue;
|
||||
|
||||
if (!tick_check_replacement(newdev, dev))
|
||||
continue;
|
||||
|
||||
if (!try_module_get(dev->owner))
|
||||
continue;
|
||||
|
||||
if (newdev)
|
||||
module_put(newdev->owner);
|
||||
newdev = dev;
|
||||
}
|
||||
if (newdev) {
|
||||
tick_install_replacement(newdev);
|
||||
list_del_init(&ced->list);
|
||||
}
|
||||
return newdev ? 0 : -EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with clockevents_mutex and clockevents_lock held
|
||||
*/
|
||||
static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
|
||||
{
|
||||
/* Fast track. Device is unused */
|
||||
if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
|
||||
list_del_init(&ced->list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return ced == per_cpu(tick_cpu_device, cpu).evtdev ? -EAGAIN : -EBUSY;
|
||||
}
|
||||
|
||||
/*
|
||||
* SMP function call to unbind a device
|
||||
*/
|
||||
static void __clockevents_unbind(void *arg)
|
||||
{
|
||||
struct ce_unbind *cu = arg;
|
||||
int res;
|
||||
|
||||
raw_spin_lock(&clockevents_lock);
|
||||
res = __clockevents_try_unbind(cu->ce, smp_processor_id());
|
||||
if (res == -EAGAIN)
|
||||
res = clockevents_replace(cu->ce);
|
||||
cu->res = res;
|
||||
raw_spin_unlock(&clockevents_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issues smp function call to unbind a per cpu device. Called with
|
||||
* clockevents_mutex held.
|
||||
*/
|
||||
static int clockevents_unbind(struct clock_event_device *ced, int cpu)
|
||||
{
|
||||
struct ce_unbind cu = { .ce = ced, .res = -ENODEV };
|
||||
|
||||
smp_call_function_single(cpu, __clockevents_unbind, &cu, 1);
|
||||
return cu.res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unbind a clockevents device.
|
||||
*/
|
||||
int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&clockevents_mutex);
|
||||
ret = clockevents_unbind(ced, cpu);
|
||||
mutex_unlock(&clockevents_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clockevents_unbind);
|
||||
|
||||
/**
|
||||
* clockevents_register_device - register a clock event device
|
||||
* @dev: device to register
|
||||
@@ -290,7 +353,7 @@ void clockevents_register_device(struct clock_event_device *dev)
|
||||
raw_spin_lock_irqsave(&clockevents_lock, flags);
|
||||
|
||||
list_add(&dev->list, &clockevent_devices);
|
||||
clockevents_do_notify(CLOCK_EVT_NOTIFY_ADD, dev);
|
||||
tick_check_new_device(dev);
|
||||
clockevents_notify_released();
|
||||
|
||||
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
|
||||
@@ -386,6 +449,7 @@ void clockevents_exchange_device(struct clock_event_device *old,
|
||||
* released list and do a notify add later.
|
||||
*/
|
||||
if (old) {
|
||||
module_put(old->owner);
|
||||
clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
|
||||
list_del(&old->list);
|
||||
list_add(&old->list, &clockevents_released);
|
||||
@@ -433,10 +497,36 @@ void clockevents_notify(unsigned long reason, void *arg)
|
||||
int cpu;
|
||||
|
||||
raw_spin_lock_irqsave(&clockevents_lock, flags);
|
||||
clockevents_do_notify(reason, arg);
|
||||
|
||||
switch (reason) {
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
|
||||
tick_broadcast_on_off(reason, arg);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
|
||||
tick_broadcast_oneshot_control(reason);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_CPU_DYING:
|
||||
tick_handover_do_timer(arg);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_SUSPEND:
|
||||
tick_suspend();
|
||||
tick_suspend_broadcast();
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_RESUME:
|
||||
tick_resume();
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_CPU_DEAD:
|
||||
tick_shutdown_broadcast_oneshot(arg);
|
||||
tick_shutdown_broadcast(arg);
|
||||
tick_shutdown(arg);
|
||||
/*
|
||||
* Unregister the clock event devices which were
|
||||
* released from the users in the notify chain.
|
||||
@@ -462,4 +552,123 @@ void clockevents_notify(unsigned long reason, void *arg)
|
||||
raw_spin_unlock_irqrestore(&clockevents_lock, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(clockevents_notify);
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
struct bus_type clockevents_subsys = {
|
||||
.name = "clockevents",
|
||||
.dev_name = "clockevent",
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct device, tick_percpu_dev);
|
||||
static struct tick_device *tick_get_tick_dev(struct device *dev);
|
||||
|
||||
static ssize_t sysfs_show_current_tick_dev(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct tick_device *td;
|
||||
ssize_t count = 0;
|
||||
|
||||
raw_spin_lock_irq(&clockevents_lock);
|
||||
td = tick_get_tick_dev(dev);
|
||||
if (td && td->evtdev)
|
||||
count = snprintf(buf, PAGE_SIZE, "%s\n", td->evtdev->name);
|
||||
raw_spin_unlock_irq(&clockevents_lock);
|
||||
return count;
|
||||
}
|
||||
static DEVICE_ATTR(current_device, 0444, sysfs_show_current_tick_dev, NULL);
|
||||
|
||||
/* We don't support the abomination of removable broadcast devices */
|
||||
static ssize_t sysfs_unbind_tick_dev(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
char name[CS_NAME_LEN];
|
||||
size_t ret = sysfs_get_uname(buf, name, count);
|
||||
struct clock_event_device *ce;
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = -ENODEV;
|
||||
mutex_lock(&clockevents_mutex);
|
||||
raw_spin_lock_irq(&clockevents_lock);
|
||||
list_for_each_entry(ce, &clockevent_devices, list) {
|
||||
if (!strcmp(ce->name, name)) {
|
||||
ret = __clockevents_try_unbind(ce, dev->id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irq(&clockevents_lock);
|
||||
/*
|
||||
* We hold clockevents_mutex, so ce can't go away
|
||||
*/
|
||||
if (ret == -EAGAIN)
|
||||
ret = clockevents_unbind(ce, dev->id);
|
||||
mutex_unlock(&clockevents_mutex);
|
||||
return ret ? ret : count;
|
||||
}
|
||||
static DEVICE_ATTR(unbind_device, 0200, NULL, sysfs_unbind_tick_dev);
|
||||
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
||||
static struct device tick_bc_dev = {
|
||||
.init_name = "broadcast",
|
||||
.id = 0,
|
||||
.bus = &clockevents_subsys,
|
||||
};
|
||||
|
||||
static struct tick_device *tick_get_tick_dev(struct device *dev)
|
||||
{
|
||||
return dev == &tick_bc_dev ? tick_get_broadcast_device() :
|
||||
&per_cpu(tick_cpu_device, dev->id);
|
||||
}
|
||||
|
||||
static __init int tick_broadcast_init_sysfs(void)
|
||||
{
|
||||
int err = device_register(&tick_bc_dev);
|
||||
|
||||
if (!err)
|
||||
err = device_create_file(&tick_bc_dev, &dev_attr_current_device);
|
||||
return err;
|
||||
}
|
||||
#else
|
||||
static struct tick_device *tick_get_tick_dev(struct device *dev)
|
||||
{
|
||||
return &per_cpu(tick_cpu_device, dev->id);
|
||||
}
|
||||
static inline int tick_broadcast_init_sysfs(void) { return 0; }
|
||||
#endif
|
||||
|
||||
static int __init tick_init_sysfs(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct device *dev = &per_cpu(tick_percpu_dev, cpu);
|
||||
int err;
|
||||
|
||||
dev->id = cpu;
|
||||
dev->bus = &clockevents_subsys;
|
||||
err = device_register(dev);
|
||||
if (!err)
|
||||
err = device_create_file(dev, &dev_attr_current_device);
|
||||
if (!err)
|
||||
err = device_create_file(dev, &dev_attr_unbind_device);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return tick_broadcast_init_sysfs();
|
||||
}
|
||||
|
||||
static int __init clockevents_init_sysfs(void)
|
||||
{
|
||||
int err = subsys_system_register(&clockevents_subsys, NULL);
|
||||
|
||||
if (!err)
|
||||
err = tick_init_sysfs();
|
||||
return err;
|
||||
}
|
||||
device_initcall(clockevents_init_sysfs);
|
||||
#endif /* SYSFS */
|
||||
|
||||
#endif /* GENERIC_CLOCK_EVENTS */
|
||||
|
@@ -31,6 +31,8 @@
|
||||
#include <linux/tick.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include "tick-internal.h"
|
||||
|
||||
void timecounter_init(struct timecounter *tc,
|
||||
const struct cyclecounter *cc,
|
||||
u64 start_tstamp)
|
||||
@@ -174,11 +176,12 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
|
||||
static struct clocksource *curr_clocksource;
|
||||
static LIST_HEAD(clocksource_list);
|
||||
static DEFINE_MUTEX(clocksource_mutex);
|
||||
static char override_name[32];
|
||||
static char override_name[CS_NAME_LEN];
|
||||
static int finished_booting;
|
||||
|
||||
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
|
||||
static void clocksource_watchdog_work(struct work_struct *work);
|
||||
static void clocksource_select(void);
|
||||
|
||||
static LIST_HEAD(watchdog_list);
|
||||
static struct clocksource *watchdog;
|
||||
@@ -299,13 +302,30 @@ static void clocksource_watchdog(unsigned long data)
|
||||
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
|
||||
(cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
|
||||
(watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
|
||||
/* Mark it valid for high-res. */
|
||||
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
|
||||
|
||||
/*
|
||||
* We just marked the clocksource as highres-capable,
|
||||
* notify the rest of the system as well so that we
|
||||
* transition into high-res mode:
|
||||
* clocksource_done_booting() will sort it if
|
||||
* finished_booting is not set yet.
|
||||
*/
|
||||
tick_clock_notify();
|
||||
if (!finished_booting)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If this is not the current clocksource let
|
||||
* the watchdog thread reselect it. Due to the
|
||||
* change to high res this clocksource might
|
||||
* be preferred now. If it is the current
|
||||
* clocksource let the tick code know about
|
||||
* that change.
|
||||
*/
|
||||
if (cs != curr_clocksource) {
|
||||
cs->flags |= CLOCK_SOURCE_RESELECT;
|
||||
schedule_work(&watchdog_work);
|
||||
} else {
|
||||
tick_clock_notify();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,44 +408,39 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||
|
||||
static void clocksource_dequeue_watchdog(struct clocksource *cs)
|
||||
{
|
||||
struct clocksource *tmp;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&watchdog_lock, flags);
|
||||
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
|
||||
/* cs is a watched clocksource. */
|
||||
list_del_init(&cs->wd_list);
|
||||
} else if (cs == watchdog) {
|
||||
/* Reset watchdog cycles */
|
||||
clocksource_reset_watchdog();
|
||||
/* Current watchdog is removed. Find an alternative. */
|
||||
watchdog = NULL;
|
||||
list_for_each_entry(tmp, &clocksource_list, list) {
|
||||
if (tmp == cs || tmp->flags & CLOCK_SOURCE_MUST_VERIFY)
|
||||
continue;
|
||||
if (!watchdog || tmp->rating > watchdog->rating)
|
||||
watchdog = tmp;
|
||||
if (cs != watchdog) {
|
||||
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
|
||||
/* cs is a watched clocksource. */
|
||||
list_del_init(&cs->wd_list);
|
||||
/* Check if the watchdog timer needs to be stopped. */
|
||||
clocksource_stop_watchdog();
|
||||
}
|
||||
}
|
||||
cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
|
||||
/* Check if the watchdog timer needs to be stopped. */
|
||||
clocksource_stop_watchdog();
|
||||
spin_unlock_irqrestore(&watchdog_lock, flags);
|
||||
}
|
||||
|
||||
static int clocksource_watchdog_kthread(void *data)
|
||||
static int __clocksource_watchdog_kthread(void)
|
||||
{
|
||||
struct clocksource *cs, *tmp;
|
||||
unsigned long flags;
|
||||
LIST_HEAD(unstable);
|
||||
int select = 0;
|
||||
|
||||
mutex_lock(&clocksource_mutex);
|
||||
spin_lock_irqsave(&watchdog_lock, flags);
|
||||
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
|
||||
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
|
||||
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
|
||||
list_del_init(&cs->wd_list);
|
||||
list_add(&cs->wd_list, &unstable);
|
||||
select = 1;
|
||||
}
|
||||
if (cs->flags & CLOCK_SOURCE_RESELECT) {
|
||||
cs->flags &= ~CLOCK_SOURCE_RESELECT;
|
||||
select = 1;
|
||||
}
|
||||
}
|
||||
/* Check if the watchdog timer needs to be stopped. */
|
||||
clocksource_stop_watchdog();
|
||||
spin_unlock_irqrestore(&watchdog_lock, flags);
|
||||
@@ -435,10 +450,23 @@ static int clocksource_watchdog_kthread(void *data)
|
||||
list_del_init(&cs->wd_list);
|
||||
__clocksource_change_rating(cs, 0);
|
||||
}
|
||||
return select;
|
||||
}
|
||||
|
||||
static int clocksource_watchdog_kthread(void *data)
|
||||
{
|
||||
mutex_lock(&clocksource_mutex);
|
||||
if (__clocksource_watchdog_kthread())
|
||||
clocksource_select();
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool clocksource_is_watchdog(struct clocksource *cs)
|
||||
{
|
||||
return cs == watchdog;
|
||||
}
|
||||
|
||||
#else /* CONFIG_CLOCKSOURCE_WATCHDOG */
|
||||
|
||||
static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||
@@ -449,7 +477,8 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
|
||||
|
||||
static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
|
||||
static inline void clocksource_resume_watchdog(void) { }
|
||||
static inline int clocksource_watchdog_kthread(void *data) { return 0; }
|
||||
static inline int __clocksource_watchdog_kthread(void) { return 0; }
|
||||
static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
|
||||
|
||||
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
|
||||
|
||||
@@ -553,24 +582,42 @@ static u64 clocksource_max_deferment(struct clocksource *cs)
|
||||
|
||||
#ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
|
||||
|
||||
/**
|
||||
* clocksource_select - Select the best clocksource available
|
||||
*
|
||||
* Private function. Must hold clocksource_mutex when called.
|
||||
*
|
||||
* Select the clocksource with the best rating, or the clocksource,
|
||||
* which is selected by userspace override.
|
||||
*/
|
||||
static void clocksource_select(void)
|
||||
static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
|
||||
{
|
||||
struct clocksource *best, *cs;
|
||||
struct clocksource *cs;
|
||||
|
||||
if (!finished_booting || list_empty(&clocksource_list))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* We pick the clocksource with the highest rating. If oneshot
|
||||
* mode is active, we pick the highres valid clocksource with
|
||||
* the best rating.
|
||||
*/
|
||||
list_for_each_entry(cs, &clocksource_list, list) {
|
||||
if (skipcur && cs == curr_clocksource)
|
||||
continue;
|
||||
if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
|
||||
continue;
|
||||
return cs;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void __clocksource_select(bool skipcur)
|
||||
{
|
||||
bool oneshot = tick_oneshot_mode_active();
|
||||
struct clocksource *best, *cs;
|
||||
|
||||
/* Find the best suitable clocksource */
|
||||
best = clocksource_find_best(oneshot, skipcur);
|
||||
if (!best)
|
||||
return;
|
||||
/* First clocksource on the list has the best rating. */
|
||||
best = list_first_entry(&clocksource_list, struct clocksource, list);
|
||||
|
||||
/* Check for the override clocksource. */
|
||||
list_for_each_entry(cs, &clocksource_list, list) {
|
||||
if (skipcur && cs == curr_clocksource)
|
||||
continue;
|
||||
if (strcmp(cs->name, override_name) != 0)
|
||||
continue;
|
||||
/*
|
||||
@@ -578,8 +625,7 @@ static void clocksource_select(void)
|
||||
* capable clocksource if the tick code is in oneshot
|
||||
* mode (highres or nohz)
|
||||
*/
|
||||
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
|
||||
tick_oneshot_mode_active()) {
|
||||
if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
|
||||
/* Override clocksource cannot be used. */
|
||||
printk(KERN_WARNING "Override clocksource %s is not "
|
||||
"HRT compatible. Cannot switch while in "
|
||||
@@ -590,16 +636,35 @@ static void clocksource_select(void)
|
||||
best = cs;
|
||||
break;
|
||||
}
|
||||
if (curr_clocksource != best) {
|
||||
printk(KERN_INFO "Switching to clocksource %s\n", best->name);
|
||||
|
||||
if (curr_clocksource != best && !timekeeping_notify(best)) {
|
||||
pr_info("Switched to clocksource %s\n", best->name);
|
||||
curr_clocksource = best;
|
||||
timekeeping_notify(curr_clocksource);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* clocksource_select - Select the best clocksource available
|
||||
*
|
||||
* Private function. Must hold clocksource_mutex when called.
|
||||
*
|
||||
* Select the clocksource with the best rating, or the clocksource,
|
||||
* which is selected by userspace override.
|
||||
*/
|
||||
static void clocksource_select(void)
|
||||
{
|
||||
return __clocksource_select(false);
|
||||
}
|
||||
|
||||
static void clocksource_select_fallback(void)
|
||||
{
|
||||
return __clocksource_select(true);
|
||||
}
|
||||
|
||||
#else /* !CONFIG_ARCH_USES_GETTIMEOFFSET */
|
||||
|
||||
static inline void clocksource_select(void) { }
|
||||
static inline void clocksource_select_fallback(void) { }
|
||||
|
||||
#endif
|
||||
|
||||
@@ -614,16 +679,11 @@ static int __init clocksource_done_booting(void)
|
||||
{
|
||||
mutex_lock(&clocksource_mutex);
|
||||
curr_clocksource = clocksource_default_clock();
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
|
||||
finished_booting = 1;
|
||||
|
||||
/*
|
||||
* Run the watchdog first to eliminate unstable clock sources
|
||||
*/
|
||||
clocksource_watchdog_kthread(NULL);
|
||||
|
||||
mutex_lock(&clocksource_mutex);
|
||||
__clocksource_watchdog_kthread();
|
||||
clocksource_select();
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
return 0;
|
||||
@@ -756,7 +816,6 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
|
||||
list_del(&cs->list);
|
||||
cs->rating = rating;
|
||||
clocksource_enqueue(cs);
|
||||
clocksource_select();
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -768,21 +827,47 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
|
||||
{
|
||||
mutex_lock(&clocksource_mutex);
|
||||
__clocksource_change_rating(cs, rating);
|
||||
clocksource_select();
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(clocksource_change_rating);
|
||||
|
||||
/*
|
||||
* Unbind clocksource @cs. Called with clocksource_mutex held
|
||||
*/
|
||||
static int clocksource_unbind(struct clocksource *cs)
|
||||
{
|
||||
/*
|
||||
* I really can't convince myself to support this on hardware
|
||||
* designed by lobotomized monkeys.
|
||||
*/
|
||||
if (clocksource_is_watchdog(cs))
|
||||
return -EBUSY;
|
||||
|
||||
if (cs == curr_clocksource) {
|
||||
/* Select and try to install a replacement clock source */
|
||||
clocksource_select_fallback();
|
||||
if (curr_clocksource == cs)
|
||||
return -EBUSY;
|
||||
}
|
||||
clocksource_dequeue_watchdog(cs);
|
||||
list_del_init(&cs->list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* clocksource_unregister - remove a registered clocksource
|
||||
* @cs: clocksource to be unregistered
|
||||
*/
|
||||
void clocksource_unregister(struct clocksource *cs)
|
||||
int clocksource_unregister(struct clocksource *cs)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&clocksource_mutex);
|
||||
clocksource_dequeue_watchdog(cs);
|
||||
list_del(&cs->list);
|
||||
clocksource_select();
|
||||
if (!list_empty(&cs->list))
|
||||
ret = clocksource_unbind(cs);
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(clocksource_unregister);
|
||||
|
||||
@@ -808,6 +893,23 @@ sysfs_show_current_clocksources(struct device *dev,
|
||||
return count;
|
||||
}
|
||||
|
||||
size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
|
||||
{
|
||||
size_t ret = cnt;
|
||||
|
||||
/* strings from sysfs write are not 0 terminated! */
|
||||
if (!cnt || cnt >= CS_NAME_LEN)
|
||||
return -EINVAL;
|
||||
|
||||
/* strip of \n: */
|
||||
if (buf[cnt-1] == '\n')
|
||||
cnt--;
|
||||
if (cnt > 0)
|
||||
memcpy(dst, buf, cnt);
|
||||
dst[cnt] = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sysfs_override_clocksource - interface for manually overriding clocksource
|
||||
* @dev: unused
|
||||
@@ -822,28 +924,53 @@ static ssize_t sysfs_override_clocksource(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
size_t ret = count;
|
||||
|
||||
/* strings from sysfs write are not 0 terminated! */
|
||||
if (count >= sizeof(override_name))
|
||||
return -EINVAL;
|
||||
|
||||
/* strip of \n: */
|
||||
if (buf[count-1] == '\n')
|
||||
count--;
|
||||
size_t ret;
|
||||
|
||||
mutex_lock(&clocksource_mutex);
|
||||
|
||||
if (count > 0)
|
||||
memcpy(override_name, buf, count);
|
||||
override_name[count] = 0;
|
||||
clocksource_select();
|
||||
ret = sysfs_get_uname(buf, override_name, count);
|
||||
if (ret >= 0)
|
||||
clocksource_select();
|
||||
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sysfs_unbind_current_clocksource - interface for manually unbinding clocksource
|
||||
* @dev: unused
|
||||
* @attr: unused
|
||||
* @buf: unused
|
||||
* @count: length of buffer
|
||||
*
|
||||
* Takes input from sysfs interface for manually unbinding a clocksource.
|
||||
*/
|
||||
static ssize_t sysfs_unbind_clocksource(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct clocksource *cs;
|
||||
char name[CS_NAME_LEN];
|
||||
size_t ret;
|
||||
|
||||
ret = sysfs_get_uname(buf, name, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = -ENODEV;
|
||||
mutex_lock(&clocksource_mutex);
|
||||
list_for_each_entry(cs, &clocksource_list, list) {
|
||||
if (strcmp(cs->name, name))
|
||||
continue;
|
||||
ret = clocksource_unbind(cs);
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&clocksource_mutex);
|
||||
|
||||
return ret ? ret : count;
|
||||
}
|
||||
|
||||
/**
|
||||
* sysfs_show_available_clocksources - sysfs interface for listing clocksource
|
||||
* @dev: unused
|
||||
@@ -886,6 +1013,8 @@ sysfs_show_available_clocksources(struct device *dev,
|
||||
static DEVICE_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
|
||||
sysfs_override_clocksource);
|
||||
|
||||
static DEVICE_ATTR(unbind_clocksource, 0200, NULL, sysfs_unbind_clocksource);
|
||||
|
||||
static DEVICE_ATTR(available_clocksource, 0444,
|
||||
sysfs_show_available_clocksources, NULL);
|
||||
|
||||
@@ -909,6 +1038,9 @@ static int __init init_clocksource_sysfs(void)
|
||||
error = device_create_file(
|
||||
&device_clocksource,
|
||||
&dev_attr_current_clocksource);
|
||||
if (!error)
|
||||
error = device_create_file(&device_clocksource,
|
||||
&dev_attr_unbind_clocksource);
|
||||
if (!error)
|
||||
error = device_create_file(
|
||||
&device_clocksource,
|
||||
|
212
kernel/time/sched_clock.c
Normal file
212
kernel/time/sched_clock.c
Normal file
@@ -0,0 +1,212 @@
|
||||
/*
|
||||
* sched_clock.c: support for extending counters to full 64-bit ns counter
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/sched_clock.h>
|
||||
|
||||
struct clock_data {
|
||||
u64 epoch_ns;
|
||||
u32 epoch_cyc;
|
||||
u32 epoch_cyc_copy;
|
||||
unsigned long rate;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
bool suspended;
|
||||
};
|
||||
|
||||
static void sched_clock_poll(unsigned long wrap_ticks);
|
||||
static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
|
||||
static int irqtime = -1;
|
||||
|
||||
core_param(irqtime, irqtime, int, 0400);
|
||||
|
||||
static struct clock_data cd = {
|
||||
.mult = NSEC_PER_SEC / HZ,
|
||||
};
|
||||
|
||||
static u32 __read_mostly sched_clock_mask = 0xffffffff;
|
||||
|
||||
static u32 notrace jiffy_sched_clock_read(void)
|
||||
{
|
||||
return (u32)(jiffies - INITIAL_JIFFIES);
|
||||
}
|
||||
|
||||
static u32 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
|
||||
|
||||
static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
|
||||
{
|
||||
return (cyc * mult) >> shift;
|
||||
}
|
||||
|
||||
static unsigned long long notrace sched_clock_32(void)
|
||||
{
|
||||
u64 epoch_ns;
|
||||
u32 epoch_cyc;
|
||||
u32 cyc;
|
||||
|
||||
if (cd.suspended)
|
||||
return cd.epoch_ns;
|
||||
|
||||
/*
|
||||
* Load the epoch_cyc and epoch_ns atomically. We do this by
|
||||
* ensuring that we always write epoch_cyc, epoch_ns and
|
||||
* epoch_cyc_copy in strict order, and read them in strict order.
|
||||
* If epoch_cyc and epoch_cyc_copy are not equal, then we're in
|
||||
* the middle of an update, and we should repeat the load.
|
||||
*/
|
||||
do {
|
||||
epoch_cyc = cd.epoch_cyc;
|
||||
smp_rmb();
|
||||
epoch_ns = cd.epoch_ns;
|
||||
smp_rmb();
|
||||
} while (epoch_cyc != cd.epoch_cyc_copy);
|
||||
|
||||
cyc = read_sched_clock();
|
||||
cyc = (cyc - epoch_cyc) & sched_clock_mask;
|
||||
return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically update the sched_clock epoch.
|
||||
*/
|
||||
static void notrace update_sched_clock(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
u32 cyc;
|
||||
u64 ns;
|
||||
|
||||
cyc = read_sched_clock();
|
||||
ns = cd.epoch_ns +
|
||||
cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
|
||||
cd.mult, cd.shift);
|
||||
/*
|
||||
* Write epoch_cyc and epoch_ns in a way that the update is
|
||||
* detectable in cyc_to_fixed_sched_clock().
|
||||
*/
|
||||
raw_local_irq_save(flags);
|
||||
cd.epoch_cyc_copy = cyc;
|
||||
smp_wmb();
|
||||
cd.epoch_ns = ns;
|
||||
smp_wmb();
|
||||
cd.epoch_cyc = cyc;
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static void sched_clock_poll(unsigned long wrap_ticks)
|
||||
{
|
||||
mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
|
||||
update_sched_clock();
|
||||
}
|
||||
|
||||
void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
|
||||
{
|
||||
unsigned long r, w;
|
||||
u64 res, wrap;
|
||||
char r_unit;
|
||||
|
||||
if (cd.rate > rate)
|
||||
return;
|
||||
|
||||
BUG_ON(bits > 32);
|
||||
WARN_ON(!irqs_disabled());
|
||||
read_sched_clock = read;
|
||||
sched_clock_mask = (1 << bits) - 1;
|
||||
cd.rate = rate;
|
||||
|
||||
/* calculate the mult/shift to convert counter ticks to ns. */
|
||||
clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 0);
|
||||
|
||||
r = rate;
|
||||
if (r >= 4000000) {
|
||||
r /= 1000000;
|
||||
r_unit = 'M';
|
||||
} else if (r >= 1000) {
|
||||
r /= 1000;
|
||||
r_unit = 'k';
|
||||
} else
|
||||
r_unit = ' ';
|
||||
|
||||
/* calculate how many ns until we wrap */
|
||||
wrap = cyc_to_ns((1ULL << bits) - 1, cd.mult, cd.shift);
|
||||
do_div(wrap, NSEC_PER_MSEC);
|
||||
w = wrap;
|
||||
|
||||
/* calculate the ns resolution of this counter */
|
||||
res = cyc_to_ns(1ULL, cd.mult, cd.shift);
|
||||
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
|
||||
bits, r, r_unit, res, w);
|
||||
|
||||
/*
|
||||
* Start the timer to keep sched_clock() properly updated and
|
||||
* sets the initial epoch.
|
||||
*/
|
||||
sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
|
||||
update_sched_clock();
|
||||
|
||||
/*
|
||||
* Ensure that sched_clock() starts off at 0ns
|
||||
*/
|
||||
cd.epoch_ns = 0;
|
||||
|
||||
/* Enable IRQ time accounting if we have a fast enough sched_clock */
|
||||
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
|
||||
enable_sched_clock_irqtime();
|
||||
|
||||
pr_debug("Registered %pF as sched_clock source\n", read);
|
||||
}
|
||||
|
||||
unsigned long long __read_mostly (*sched_clock_func)(void) = sched_clock_32;
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
return sched_clock_func();
|
||||
}
|
||||
|
||||
void __init sched_clock_postinit(void)
|
||||
{
|
||||
/*
|
||||
* If no sched_clock function has been provided at that point,
|
||||
* make it the final one one.
|
||||
*/
|
||||
if (read_sched_clock == jiffy_sched_clock_read)
|
||||
setup_sched_clock(jiffy_sched_clock_read, 32, HZ);
|
||||
|
||||
sched_clock_poll(sched_clock_timer.data);
|
||||
}
|
||||
|
||||
static int sched_clock_suspend(void)
|
||||
{
|
||||
sched_clock_poll(sched_clock_timer.data);
|
||||
cd.suspended = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void sched_clock_resume(void)
|
||||
{
|
||||
cd.epoch_cyc = read_sched_clock();
|
||||
cd.epoch_cyc_copy = cd.epoch_cyc;
|
||||
cd.suspended = false;
|
||||
}
|
||||
|
||||
static struct syscore_ops sched_clock_ops = {
|
||||
.suspend = sched_clock_suspend,
|
||||
.resume = sched_clock_resume,
|
||||
};
|
||||
|
||||
static int __init sched_clock_syscore_init(void)
|
||||
{
|
||||
register_syscore_ops(&sched_clock_ops);
|
||||
return 0;
|
||||
}
|
||||
device_initcall(sched_clock_syscore_init);
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/profile.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "tick-internal.h"
|
||||
|
||||
@@ -29,6 +30,7 @@
|
||||
|
||||
static struct tick_device tick_broadcast_device;
|
||||
static cpumask_var_t tick_broadcast_mask;
|
||||
static cpumask_var_t tick_broadcast_on;
|
||||
static cpumask_var_t tmpmask;
|
||||
static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
|
||||
static int tick_broadcast_force;
|
||||
@@ -64,17 +66,34 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
|
||||
/*
|
||||
* Check, if the device can be utilized as broadcast device:
|
||||
*/
|
||||
int tick_check_broadcast_device(struct clock_event_device *dev)
|
||||
static bool tick_check_broadcast_device(struct clock_event_device *curdev,
|
||||
struct clock_event_device *newdev)
|
||||
{
|
||||
if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
|
||||
(newdev->features & CLOCK_EVT_FEAT_C3STOP))
|
||||
return false;
|
||||
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
|
||||
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
|
||||
return false;
|
||||
|
||||
return !curdev || newdev->rating > curdev->rating;
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditionally install/replace broadcast device
|
||||
*/
|
||||
void tick_install_broadcast_device(struct clock_event_device *dev)
|
||||
{
|
||||
struct clock_event_device *cur = tick_broadcast_device.evtdev;
|
||||
|
||||
if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
|
||||
(tick_broadcast_device.evtdev &&
|
||||
tick_broadcast_device.evtdev->rating >= dev->rating) ||
|
||||
(dev->features & CLOCK_EVT_FEAT_C3STOP))
|
||||
return 0;
|
||||
if (!tick_check_broadcast_device(cur, dev))
|
||||
return;
|
||||
|
||||
clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
|
||||
if (!try_module_get(dev->owner))
|
||||
return;
|
||||
|
||||
clockevents_exchange_device(cur, dev);
|
||||
if (cur)
|
||||
cur->event_handler = clockevents_handle_noop;
|
||||
tick_broadcast_device.evtdev = dev;
|
||||
@@ -90,7 +109,6 @@ int tick_check_broadcast_device(struct clock_event_device *dev)
|
||||
*/
|
||||
if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
|
||||
tick_clock_notify();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -123,8 +141,9 @@ static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
|
||||
*/
|
||||
int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
|
||||
{
|
||||
struct clock_event_device *bc = tick_broadcast_device.evtdev;
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
|
||||
|
||||
@@ -138,20 +157,62 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
|
||||
dev->event_handler = tick_handle_periodic;
|
||||
tick_device_setup_broadcast_func(dev);
|
||||
cpumask_set_cpu(cpu, tick_broadcast_mask);
|
||||
tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
|
||||
tick_broadcast_start_periodic(bc);
|
||||
else
|
||||
tick_broadcast_setup_oneshot(bc);
|
||||
ret = 1;
|
||||
} else {
|
||||
/*
|
||||
* When the new device is not affected by the stop
|
||||
* feature and the cpu is marked in the broadcast mask
|
||||
* then clear the broadcast bit.
|
||||
* Clear the broadcast bit for this cpu if the
|
||||
* device is not power state affected.
|
||||
*/
|
||||
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
|
||||
int cpu = smp_processor_id();
|
||||
if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_mask);
|
||||
tick_broadcast_clear_oneshot(cpu);
|
||||
} else {
|
||||
else
|
||||
tick_device_setup_broadcast_func(dev);
|
||||
|
||||
/*
|
||||
* Clear the broadcast bit if the CPU is not in
|
||||
* periodic broadcast on state.
|
||||
*/
|
||||
if (!cpumask_test_cpu(cpu, tick_broadcast_on))
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_mask);
|
||||
|
||||
switch (tick_broadcast_device.mode) {
|
||||
case TICKDEV_MODE_ONESHOT:
|
||||
/*
|
||||
* If the system is in oneshot mode we can
|
||||
* unconditionally clear the oneshot mask bit,
|
||||
* because the CPU is running and therefore
|
||||
* not in an idle state which causes the power
|
||||
* state affected device to stop. Let the
|
||||
* caller initialize the device.
|
||||
*/
|
||||
tick_broadcast_clear_oneshot(cpu);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case TICKDEV_MODE_PERIODIC:
|
||||
/*
|
||||
* If the system is in periodic mode, check
|
||||
* whether the broadcast device can be
|
||||
* switched off now.
|
||||
*/
|
||||
if (cpumask_empty(tick_broadcast_mask) && bc)
|
||||
clockevents_shutdown(bc);
|
||||
/*
|
||||
* If we kept the cpu in the broadcast mask,
|
||||
* tell the caller to leave the per cpu device
|
||||
* in shutdown state. The periodic interrupt
|
||||
* is delivered by the broadcast device.
|
||||
*/
|
||||
ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
|
||||
break;
|
||||
default:
|
||||
/* Nothing to do */
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
|
||||
@@ -281,6 +342,7 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
|
||||
switch (*reason) {
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
|
||||
cpumask_set_cpu(cpu, tick_broadcast_on);
|
||||
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
|
||||
if (tick_broadcast_device.mode ==
|
||||
TICKDEV_MODE_PERIODIC)
|
||||
@@ -290,8 +352,12 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
|
||||
tick_broadcast_force = 1;
|
||||
break;
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
|
||||
if (!tick_broadcast_force &&
|
||||
cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
|
||||
if (tick_broadcast_force)
|
||||
break;
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_on);
|
||||
if (!tick_device_is_functional(dev))
|
||||
break;
|
||||
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
|
||||
if (tick_broadcast_device.mode ==
|
||||
TICKDEV_MODE_PERIODIC)
|
||||
tick_setup_periodic(dev, 0);
|
||||
@@ -349,6 +415,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
|
||||
|
||||
bc = tick_broadcast_device.evtdev;
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_mask);
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_on);
|
||||
|
||||
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
|
||||
if (bc && cpumask_empty(tick_broadcast_mask))
|
||||
@@ -475,7 +542,15 @@ void tick_check_oneshot_broadcast(int cpu)
|
||||
if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
|
||||
struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
|
||||
|
||||
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
|
||||
/*
|
||||
* We might be in the middle of switching over from
|
||||
* periodic to oneshot. If the CPU has not yet
|
||||
* switched over, leave the device alone.
|
||||
*/
|
||||
if (td->mode == TICKDEV_MODE_ONESHOT) {
|
||||
clockevents_set_mode(td->evtdev,
|
||||
CLOCK_EVT_MODE_ONESHOT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -521,6 +596,13 @@ again:
|
||||
cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
|
||||
cpumask_clear(tick_broadcast_force_mask);
|
||||
|
||||
/*
|
||||
* Sanity check. Catch the case where we try to broadcast to
|
||||
* offline cpus.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
|
||||
cpumask_and(tmpmask, tmpmask, cpu_online_mask);
|
||||
|
||||
/*
|
||||
* Wakeup the cpus which have an expired event.
|
||||
*/
|
||||
@@ -761,10 +843,12 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
|
||||
raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
|
||||
|
||||
/*
|
||||
* Clear the broadcast mask flag for the dead cpu, but do not
|
||||
* stop the broadcast device!
|
||||
* Clear the broadcast masks for the dead cpu, but do not stop
|
||||
* the broadcast device!
|
||||
*/
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
|
||||
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
|
||||
|
||||
raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
|
||||
}
|
||||
@@ -792,6 +876,7 @@ bool tick_broadcast_oneshot_available(void)
|
||||
void __init tick_broadcast_init(void)
|
||||
{
|
||||
zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
|
||||
zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
|
||||
zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
|
||||
#ifdef CONFIG_TICK_ONESHOT
|
||||
zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
|
||||
|
@@ -18,6 +18,7 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
|
||||
@@ -33,7 +34,6 @@ DEFINE_PER_CPU(struct tick_device, tick_cpu_device);
|
||||
ktime_t tick_next_period;
|
||||
ktime_t tick_period;
|
||||
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
|
||||
static DEFINE_RAW_SPINLOCK(tick_device_lock);
|
||||
|
||||
/*
|
||||
* Debugging: see timer_list.c
|
||||
@@ -194,7 +194,8 @@ static void tick_setup_device(struct tick_device *td,
|
||||
* When global broadcasting is active, check if the current
|
||||
* device is registered as a placeholder for broadcast mode.
|
||||
* This allows us to handle this x86 misfeature in a generic
|
||||
* way.
|
||||
* way. This function also returns !=0 when we keep the
|
||||
* current active broadcast state for this CPU.
|
||||
*/
|
||||
if (tick_device_uses_broadcast(newdev, cpu))
|
||||
return;
|
||||
@@ -205,17 +206,75 @@ static void tick_setup_device(struct tick_device *td,
|
||||
tick_setup_oneshot(newdev, handler, next_event);
|
||||
}
|
||||
|
||||
void tick_install_replacement(struct clock_event_device *newdev)
|
||||
{
|
||||
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
clockevents_exchange_device(td->evtdev, newdev);
|
||||
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
|
||||
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
|
||||
tick_oneshot_notify();
|
||||
}
|
||||
|
||||
static bool tick_check_percpu(struct clock_event_device *curdev,
|
||||
struct clock_event_device *newdev, int cpu)
|
||||
{
|
||||
if (!cpumask_test_cpu(cpu, newdev->cpumask))
|
||||
return false;
|
||||
if (cpumask_equal(newdev->cpumask, cpumask_of(cpu)))
|
||||
return true;
|
||||
/* Check if irq affinity can be set */
|
||||
if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq))
|
||||
return false;
|
||||
/* Prefer an existing cpu local device */
|
||||
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool tick_check_preferred(struct clock_event_device *curdev,
|
||||
struct clock_event_device *newdev)
|
||||
{
|
||||
/* Prefer oneshot capable device */
|
||||
if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) {
|
||||
if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT))
|
||||
return false;
|
||||
if (tick_oneshot_mode_active())
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use the higher rated one, but prefer a CPU local device with a lower
|
||||
* rating than a non-CPU local device
|
||||
*/
|
||||
return !curdev ||
|
||||
newdev->rating > curdev->rating ||
|
||||
!cpumask_equal(curdev->cpumask, newdev->cpumask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check, if the new registered device should be used.
|
||||
* Check whether the new device is a better fit than curdev. curdev
|
||||
* can be NULL !
|
||||
*/
|
||||
static int tick_check_new_device(struct clock_event_device *newdev)
|
||||
bool tick_check_replacement(struct clock_event_device *curdev,
|
||||
struct clock_event_device *newdev)
|
||||
{
|
||||
if (tick_check_percpu(curdev, newdev, smp_processor_id()))
|
||||
return false;
|
||||
|
||||
return tick_check_preferred(curdev, newdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check, if the new registered device should be used. Called with
|
||||
* clockevents_lock held and interrupts disabled.
|
||||
*/
|
||||
void tick_check_new_device(struct clock_event_device *newdev)
|
||||
{
|
||||
struct clock_event_device *curdev;
|
||||
struct tick_device *td;
|
||||
int cpu, ret = NOTIFY_OK;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&tick_device_lock, flags);
|
||||
int cpu;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
if (!cpumask_test_cpu(cpu, newdev->cpumask))
|
||||
@@ -225,40 +284,15 @@ static int tick_check_new_device(struct clock_event_device *newdev)
|
||||
curdev = td->evtdev;
|
||||
|
||||
/* cpu local device ? */
|
||||
if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) {
|
||||
if (!tick_check_percpu(curdev, newdev, cpu))
|
||||
goto out_bc;
|
||||
|
||||
/*
|
||||
* If the cpu affinity of the device interrupt can not
|
||||
* be set, ignore it.
|
||||
*/
|
||||
if (!irq_can_set_affinity(newdev->irq))
|
||||
goto out_bc;
|
||||
/* Preference decision */
|
||||
if (!tick_check_preferred(curdev, newdev))
|
||||
goto out_bc;
|
||||
|
||||
/*
|
||||
* If we have a cpu local device already, do not replace it
|
||||
* by a non cpu local device
|
||||
*/
|
||||
if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu)))
|
||||
goto out_bc;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have an active device, then check the rating and the oneshot
|
||||
* feature.
|
||||
*/
|
||||
if (curdev) {
|
||||
/*
|
||||
* Prefer one shot capable devices !
|
||||
*/
|
||||
if ((curdev->features & CLOCK_EVT_FEAT_ONESHOT) &&
|
||||
!(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
|
||||
goto out_bc;
|
||||
/*
|
||||
* Check the rating
|
||||
*/
|
||||
if (curdev->rating >= newdev->rating)
|
||||
goto out_bc;
|
||||
}
|
||||
if (!try_module_get(newdev->owner))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Replace the eventually existing device by the new
|
||||
@@ -273,20 +307,13 @@ static int tick_check_new_device(struct clock_event_device *newdev)
|
||||
tick_setup_device(td, newdev, cpu, cpumask_of(cpu));
|
||||
if (newdev->features & CLOCK_EVT_FEAT_ONESHOT)
|
||||
tick_oneshot_notify();
|
||||
|
||||
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
return NOTIFY_STOP;
|
||||
return;
|
||||
|
||||
out_bc:
|
||||
/*
|
||||
* Can the new device be used as a broadcast device ?
|
||||
*/
|
||||
if (tick_check_broadcast_device(newdev))
|
||||
ret = NOTIFY_STOP;
|
||||
|
||||
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
|
||||
return ret;
|
||||
tick_install_broadcast_device(newdev);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -294,7 +321,7 @@ out_bc:
|
||||
*
|
||||
* Called with interrupts disabled.
|
||||
*/
|
||||
static void tick_handover_do_timer(int *cpup)
|
||||
void tick_handover_do_timer(int *cpup)
|
||||
{
|
||||
if (*cpup == tick_do_timer_cpu) {
|
||||
int cpu = cpumask_first(cpu_online_mask);
|
||||
@@ -311,13 +338,11 @@ static void tick_handover_do_timer(int *cpup)
|
||||
* access the hardware device itself.
|
||||
* We just set the mode and remove it from the lists.
|
||||
*/
|
||||
static void tick_shutdown(unsigned int *cpup)
|
||||
void tick_shutdown(unsigned int *cpup)
|
||||
{
|
||||
struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
|
||||
struct clock_event_device *dev = td->evtdev;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&tick_device_lock, flags);
|
||||
td->mode = TICKDEV_MODE_PERIODIC;
|
||||
if (dev) {
|
||||
/*
|
||||
@@ -329,26 +354,20 @@ static void tick_shutdown(unsigned int *cpup)
|
||||
dev->event_handler = clockevents_handle_noop;
|
||||
td->evtdev = NULL;
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
}
|
||||
|
||||
static void tick_suspend(void)
|
||||
void tick_suspend(void)
|
||||
{
|
||||
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&tick_device_lock, flags);
|
||||
clockevents_shutdown(td->evtdev);
|
||||
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
}
|
||||
|
||||
static void tick_resume(void)
|
||||
void tick_resume(void)
|
||||
{
|
||||
struct tick_device *td = &__get_cpu_var(tick_cpu_device);
|
||||
unsigned long flags;
|
||||
int broadcast = tick_resume_broadcast();
|
||||
|
||||
raw_spin_lock_irqsave(&tick_device_lock, flags);
|
||||
clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
|
||||
|
||||
if (!broadcast) {
|
||||
@@ -357,68 +376,12 @@ static void tick_resume(void)
|
||||
else
|
||||
tick_resume_oneshot();
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&tick_device_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notification about clock event devices
|
||||
*/
|
||||
static int tick_notify(struct notifier_block *nb, unsigned long reason,
|
||||
void *dev)
|
||||
{
|
||||
switch (reason) {
|
||||
|
||||
case CLOCK_EVT_NOTIFY_ADD:
|
||||
return tick_check_new_device(dev);
|
||||
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ON:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
|
||||
tick_broadcast_on_off(reason, dev);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
|
||||
case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
|
||||
tick_broadcast_oneshot_control(reason);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_CPU_DYING:
|
||||
tick_handover_do_timer(dev);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_CPU_DEAD:
|
||||
tick_shutdown_broadcast_oneshot(dev);
|
||||
tick_shutdown_broadcast(dev);
|
||||
tick_shutdown(dev);
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_SUSPEND:
|
||||
tick_suspend();
|
||||
tick_suspend_broadcast();
|
||||
break;
|
||||
|
||||
case CLOCK_EVT_NOTIFY_RESUME:
|
||||
tick_resume();
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block tick_notifier = {
|
||||
.notifier_call = tick_notify,
|
||||
};
|
||||
|
||||
/**
|
||||
* tick_init - initialize the tick control
|
||||
*
|
||||
* Register the notifier with the clockevents framework
|
||||
*/
|
||||
void __init tick_init(void)
|
||||
{
|
||||
clockevents_register_notifier(&tick_notifier);
|
||||
tick_broadcast_init();
|
||||
}
|
||||
|
@@ -6,6 +6,8 @@
|
||||
|
||||
extern seqlock_t jiffies_lock;
|
||||
|
||||
#define CS_NAME_LEN 32
|
||||
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
|
||||
|
||||
#define TICK_DO_TIMER_NONE -1
|
||||
@@ -18,9 +20,19 @@ extern int tick_do_timer_cpu __read_mostly;
|
||||
|
||||
extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
|
||||
extern void tick_handle_periodic(struct clock_event_device *dev);
|
||||
extern void tick_check_new_device(struct clock_event_device *dev);
|
||||
extern void tick_handover_do_timer(int *cpup);
|
||||
extern void tick_shutdown(unsigned int *cpup);
|
||||
extern void tick_suspend(void);
|
||||
extern void tick_resume(void);
|
||||
extern bool tick_check_replacement(struct clock_event_device *curdev,
|
||||
struct clock_event_device *newdev);
|
||||
extern void tick_install_replacement(struct clock_event_device *dev);
|
||||
|
||||
extern void clockevents_shutdown(struct clock_event_device *dev);
|
||||
|
||||
extern size_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
|
||||
|
||||
/*
|
||||
* NO_HZ / high resolution timer shared code
|
||||
*/
|
||||
@@ -90,7 +102,7 @@ static inline bool tick_broadcast_oneshot_available(void) { return false; }
|
||||
*/
|
||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
|
||||
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
|
||||
extern int tick_check_broadcast_device(struct clock_event_device *dev);
|
||||
extern void tick_install_broadcast_device(struct clock_event_device *dev);
|
||||
extern int tick_is_broadcast_device(struct clock_event_device *dev);
|
||||
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
|
||||
extern void tick_shutdown_broadcast(unsigned int *cpup);
|
||||
@@ -102,9 +114,8 @@ tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
|
||||
|
||||
#else /* !BROADCAST */
|
||||
|
||||
static inline int tick_check_broadcast_device(struct clock_event_device *dev)
|
||||
static inline void tick_install_broadcast_device(struct clock_event_device *dev)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int tick_is_broadcast_device(struct clock_event_device *dev)
|
||||
|
@@ -178,6 +178,11 @@ static bool can_stop_full_tick(void)
|
||||
*/
|
||||
if (!sched_clock_stable) {
|
||||
trace_tick_stop(0, "unstable sched clock\n");
|
||||
/*
|
||||
* Don't allow the user to think they can get
|
||||
* full NO_HZ with this machine.
|
||||
*/
|
||||
WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
@@ -293,7 +298,7 @@ static int __init tick_nohz_full_setup(char *str)
|
||||
}
|
||||
__setup("nohz_full=", tick_nohz_full_setup);
|
||||
|
||||
static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
|
||||
static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
@@ -346,16 +351,6 @@ void __init tick_nohz_init(void)
|
||||
}
|
||||
|
||||
cpu_notifier(tick_nohz_cpu_down_callback, 0);
|
||||
|
||||
/* Make sure full dynticks CPU are also RCU nocbs */
|
||||
for_each_cpu(cpu, nohz_full_mask) {
|
||||
if (!rcu_is_nocb_cpu(cpu)) {
|
||||
pr_warning("NO_HZ: CPU %d is not RCU nocb: "
|
||||
"cleared from nohz_full range", cpu);
|
||||
cpumask_clear_cpu(cpu, nohz_full_mask);
|
||||
}
|
||||
}
|
||||
|
||||
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
|
||||
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
|
||||
}
|
||||
|
@@ -25,6 +25,11 @@
|
||||
|
||||
#include "tick-internal.h"
|
||||
#include "ntp_internal.h"
|
||||
#include "timekeeping_internal.h"
|
||||
|
||||
#define TK_CLEAR_NTP (1 << 0)
|
||||
#define TK_MIRROR (1 << 1)
|
||||
#define TK_CLOCK_WAS_SET (1 << 2)
|
||||
|
||||
static struct timekeeper timekeeper;
|
||||
static DEFINE_RAW_SPINLOCK(timekeeper_lock);
|
||||
@@ -200,9 +205,9 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
|
||||
|
||||
static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
|
||||
|
||||
static void update_pvclock_gtod(struct timekeeper *tk)
|
||||
static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
|
||||
{
|
||||
raw_notifier_call_chain(&pvclock_gtod_chain, 0, tk);
|
||||
raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -216,7 +221,7 @@ int pvclock_gtod_register_notifier(struct notifier_block *nb)
|
||||
|
||||
raw_spin_lock_irqsave(&timekeeper_lock, flags);
|
||||
ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
|
||||
update_pvclock_gtod(tk);
|
||||
update_pvclock_gtod(tk, true);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
|
||||
return ret;
|
||||
@@ -241,16 +246,16 @@ int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
|
||||
EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
|
||||
|
||||
/* must hold timekeeper_lock */
|
||||
static void timekeeping_update(struct timekeeper *tk, bool clearntp, bool mirror)
|
||||
static void timekeeping_update(struct timekeeper *tk, unsigned int action)
|
||||
{
|
||||
if (clearntp) {
|
||||
if (action & TK_CLEAR_NTP) {
|
||||
tk->ntp_error = 0;
|
||||
ntp_clear();
|
||||
}
|
||||
update_vsyscall(tk);
|
||||
update_pvclock_gtod(tk);
|
||||
update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
|
||||
|
||||
if (mirror)
|
||||
if (action & TK_MIRROR)
|
||||
memcpy(&shadow_timekeeper, &timekeeper, sizeof(timekeeper));
|
||||
}
|
||||
|
||||
@@ -508,7 +513,7 @@ int do_settimeofday(const struct timespec *tv)
|
||||
|
||||
tk_set_xtime(tk, tv);
|
||||
|
||||
timekeeping_update(tk, true, true);
|
||||
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
|
||||
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
@@ -552,7 +557,7 @@ int timekeeping_inject_offset(struct timespec *ts)
|
||||
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts));
|
||||
|
||||
error: /* even if we error out, we forwarded the time, so call update */
|
||||
timekeeping_update(tk, true, true);
|
||||
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
|
||||
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
@@ -627,13 +632,22 @@ static int change_clocksource(void *data)
|
||||
write_seqcount_begin(&timekeeper_seq);
|
||||
|
||||
timekeeping_forward_now(tk);
|
||||
if (!new->enable || new->enable(new) == 0) {
|
||||
old = tk->clock;
|
||||
tk_setup_internals(tk, new);
|
||||
if (old->disable)
|
||||
old->disable(old);
|
||||
/*
|
||||
* If the cs is in module, get a module reference. Succeeds
|
||||
* for built-in code (owner == NULL) as well.
|
||||
*/
|
||||
if (try_module_get(new->owner)) {
|
||||
if (!new->enable || new->enable(new) == 0) {
|
||||
old = tk->clock;
|
||||
tk_setup_internals(tk, new);
|
||||
if (old->disable)
|
||||
old->disable(old);
|
||||
module_put(old->owner);
|
||||
} else {
|
||||
module_put(new->owner);
|
||||
}
|
||||
}
|
||||
timekeeping_update(tk, true, true);
|
||||
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
|
||||
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
@@ -648,14 +662,15 @@ static int change_clocksource(void *data)
|
||||
* This function is called from clocksource.c after a new, better clock
|
||||
* source has been registered. The caller holds the clocksource_mutex.
|
||||
*/
|
||||
void timekeeping_notify(struct clocksource *clock)
|
||||
int timekeeping_notify(struct clocksource *clock)
|
||||
{
|
||||
struct timekeeper *tk = &timekeeper;
|
||||
|
||||
if (tk->clock == clock)
|
||||
return;
|
||||
return 0;
|
||||
stop_machine(change_clocksource, clock, NULL);
|
||||
tick_clock_notify();
|
||||
return tk->clock == clock ? 0 : -1;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -841,6 +856,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
|
||||
tk_xtime_add(tk, delta);
|
||||
tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *delta));
|
||||
tk_set_sleep_time(tk, timespec_add(tk->total_sleep_time, *delta));
|
||||
tk_debug_account_sleep_time(delta);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -872,7 +888,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
|
||||
|
||||
__timekeeping_inject_sleeptime(tk, delta);
|
||||
|
||||
timekeeping_update(tk, true, true);
|
||||
timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
|
||||
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
@@ -954,7 +970,7 @@ static void timekeeping_resume(void)
|
||||
tk->cycle_last = clock->cycle_last = cycle_now;
|
||||
tk->ntp_error = 0;
|
||||
timekeeping_suspended = 0;
|
||||
timekeeping_update(tk, false, true);
|
||||
timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
|
||||
@@ -1236,9 +1252,10 @@ out_adjust:
|
||||
* It also calls into the NTP code to handle leapsecond processing.
|
||||
*
|
||||
*/
|
||||
static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
|
||||
static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
|
||||
{
|
||||
u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
|
||||
unsigned int action = 0;
|
||||
|
||||
while (tk->xtime_nsec >= nsecps) {
|
||||
int leap;
|
||||
@@ -1261,8 +1278,10 @@ static inline void accumulate_nsecs_to_secs(struct timekeeper *tk)
|
||||
__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
|
||||
|
||||
clock_was_set_delayed();
|
||||
action = TK_CLOCK_WAS_SET;
|
||||
}
|
||||
}
|
||||
return action;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1347,6 +1366,7 @@ static void update_wall_time(void)
|
||||
struct timekeeper *tk = &shadow_timekeeper;
|
||||
cycle_t offset;
|
||||
int shift = 0, maxshift;
|
||||
unsigned int action;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&timekeeper_lock, flags);
|
||||
@@ -1399,7 +1419,7 @@ static void update_wall_time(void)
|
||||
* Finally, make sure that after the rounding
|
||||
* xtime_nsec isn't larger than NSEC_PER_SEC
|
||||
*/
|
||||
accumulate_nsecs_to_secs(tk);
|
||||
action = accumulate_nsecs_to_secs(tk);
|
||||
|
||||
write_seqcount_begin(&timekeeper_seq);
|
||||
/* Update clock->cycle_last with the new value */
|
||||
@@ -1415,7 +1435,7 @@ static void update_wall_time(void)
|
||||
* updating.
|
||||
*/
|
||||
memcpy(real_tk, tk, sizeof(*tk));
|
||||
timekeeping_update(real_tk, false, false);
|
||||
timekeeping_update(real_tk, action);
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
|
||||
@@ -1677,6 +1697,7 @@ int do_adjtimex(struct timex *txc)
|
||||
|
||||
if (tai != orig_tai) {
|
||||
__timekeeping_set_tai_offset(tk, tai);
|
||||
update_pvclock_gtod(tk, true);
|
||||
clock_was_set_delayed();
|
||||
}
|
||||
write_seqcount_end(&timekeeper_seq);
|
||||
|
72
kernel/time/timekeeping_debug.c
Normal file
72
kernel/time/timekeeping_debug.c
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
* debugfs file to track time spent in suspend
|
||||
*
|
||||
* Copyright (c) 2011, Google, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
* more details.
|
||||
*/
|
||||
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/time.h>
|
||||
|
||||
static unsigned int sleep_time_bin[32] = {0};
|
||||
|
||||
static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
|
||||
{
|
||||
unsigned int bin;
|
||||
seq_puts(s, " time (secs) count\n");
|
||||
seq_puts(s, "------------------------------\n");
|
||||
for (bin = 0; bin < 32; bin++) {
|
||||
if (sleep_time_bin[bin] == 0)
|
||||
continue;
|
||||
seq_printf(s, "%10u - %-10u %4u\n",
|
||||
bin ? 1 << (bin - 1) : 0, 1 << bin,
|
||||
sleep_time_bin[bin]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
return single_open(file, tk_debug_show_sleep_time, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations tk_debug_sleep_time_fops = {
|
||||
.open = tk_debug_sleep_time_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static int __init tk_debug_sleep_time_init(void)
|
||||
{
|
||||
struct dentry *d;
|
||||
|
||||
d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
|
||||
&tk_debug_sleep_time_fops);
|
||||
if (!d) {
|
||||
pr_err("Failed to create sleep_time debug file\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(tk_debug_sleep_time_init);
|
||||
|
||||
void tk_debug_account_sleep_time(struct timespec *t)
|
||||
{
|
||||
sleep_time_bin[fls(t->tv_sec)]++;
|
||||
}
|
||||
|
14
kernel/time/timekeeping_internal.h
Normal file
14
kernel/time/timekeeping_internal.h
Normal file
@@ -0,0 +1,14 @@
|
||||
#ifndef _TIMEKEEPING_INTERNAL_H
|
||||
#define _TIMEKEEPING_INTERNAL_H
|
||||
/*
|
||||
* timekeeping debug functions
|
||||
*/
|
||||
#include <linux/time.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
extern void tk_debug_account_sleep_time(struct timespec *t);
|
||||
#else
|
||||
#define tk_debug_account_sleep_time(x)
|
||||
#endif
|
||||
|
||||
#endif /* _TIMEKEEPING_INTERNAL_H */
|
@@ -149,9 +149,11 @@ static unsigned long round_jiffies_common(unsigned long j, int cpu,
|
||||
/* now that we have rounded, subtract the extra skew again */
|
||||
j -= cpu * 3;
|
||||
|
||||
if (j <= jiffies) /* rounding ate our timeout entirely; */
|
||||
return original;
|
||||
return j;
|
||||
/*
|
||||
* Make sure j is still in the future. Otherwise return the
|
||||
* unmodified value.
|
||||
*/
|
||||
return time_is_after_jiffies(j) ? j : original;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1503,11 +1505,11 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
|
||||
}
|
||||
EXPORT_SYMBOL(schedule_timeout_uninterruptible);
|
||||
|
||||
static int __cpuinit init_timers_cpu(int cpu)
|
||||
static int init_timers_cpu(int cpu)
|
||||
{
|
||||
int j;
|
||||
struct tvec_base *base;
|
||||
static char __cpuinitdata tvec_base_done[NR_CPUS];
|
||||
static char tvec_base_done[NR_CPUS];
|
||||
|
||||
if (!tvec_base_done[cpu]) {
|
||||
static char boot_done;
|
||||
@@ -1575,7 +1577,7 @@ static void migrate_timer_list(struct tvec_base *new_base, struct list_head *hea
|
||||
}
|
||||
}
|
||||
|
||||
static void __cpuinit migrate_timers(int cpu)
|
||||
static void migrate_timers(int cpu)
|
||||
{
|
||||
struct tvec_base *old_base;
|
||||
struct tvec_base *new_base;
|
||||
@@ -1608,7 +1610,7 @@ static void __cpuinit migrate_timers(int cpu)
|
||||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static int __cpuinit timer_cpu_notify(struct notifier_block *self,
|
||||
static int timer_cpu_notify(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
long cpu = (long)hcpu;
|
||||
@@ -1633,7 +1635,7 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block __cpuinitdata timers_nb = {
|
||||
static struct notifier_block timers_nb = {
|
||||
.notifier_call = timer_cpu_notify,
|
||||
};
|
||||
|
||||
|
@@ -413,6 +413,17 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ftrace_sync(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
* This function is just a stub to implement a hard force
|
||||
* of synchronize_sched(). This requires synchronizing
|
||||
* tasks even in userspace and idle.
|
||||
*
|
||||
* Yes, function tracing is rude.
|
||||
*/
|
||||
}
|
||||
|
||||
static int __unregister_ftrace_function(struct ftrace_ops *ops)
|
||||
{
|
||||
int ret;
|
||||
@@ -440,8 +451,12 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
|
||||
* so there'll be no new users. We must ensure
|
||||
* all current users are done before we free
|
||||
* the control data.
|
||||
* Note synchronize_sched() is not enough, as we
|
||||
* use preempt_disable() to do RCU, but the function
|
||||
* tracer can be called where RCU is not active
|
||||
* (before user_exit()).
|
||||
*/
|
||||
synchronize_sched();
|
||||
schedule_on_each_cpu(ftrace_sync);
|
||||
control_ops_free(ops);
|
||||
}
|
||||
} else
|
||||
@@ -456,9 +471,13 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
|
||||
/*
|
||||
* Dynamic ops may be freed, we must make sure that all
|
||||
* callers are done before leaving this function.
|
||||
*
|
||||
* Again, normal synchronize_sched() is not good enough.
|
||||
* We need to do a hard force of sched synchronization.
|
||||
*/
|
||||
if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
|
||||
synchronize_sched();
|
||||
schedule_on_each_cpu(ftrace_sync);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -622,12 +641,18 @@ static int function_stat_show(struct seq_file *m, void *v)
|
||||
if (rec->counter <= 1)
|
||||
stddev = 0;
|
||||
else {
|
||||
stddev = rec->time_squared - rec->counter * avg * avg;
|
||||
/*
|
||||
* Apply Welford's method:
|
||||
* s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
|
||||
*/
|
||||
stddev = rec->counter * rec->time_squared -
|
||||
rec->time * rec->time;
|
||||
|
||||
/*
|
||||
* Divide only 1000 for ns^2 -> us^2 conversion.
|
||||
* trace_print_graph_duration will divide 1000 again.
|
||||
*/
|
||||
do_div(stddev, (rec->counter - 1) * 1000);
|
||||
do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
|
||||
}
|
||||
|
||||
trace_seq_init(&s);
|
||||
@@ -3512,8 +3537,12 @@ EXPORT_SYMBOL_GPL(ftrace_set_global_notrace);
|
||||
static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
|
||||
static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
|
||||
|
||||
/* Used by function selftest to not test if filter is set */
|
||||
bool ftrace_filter_param __initdata;
|
||||
|
||||
static int __init set_ftrace_notrace(char *str)
|
||||
{
|
||||
ftrace_filter_param = true;
|
||||
strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
|
||||
return 1;
|
||||
}
|
||||
@@ -3521,6 +3550,7 @@ __setup("ftrace_notrace=", set_ftrace_notrace);
|
||||
|
||||
static int __init set_ftrace_filter(char *str)
|
||||
{
|
||||
ftrace_filter_param = true;
|
||||
strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
|
||||
return 1;
|
||||
}
|
||||
|
@@ -36,11 +36,11 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = trace_seq_printf(s, "# compressed entry header\n");
|
||||
ret = trace_seq_printf(s, "\ttype_len : 5 bits\n");
|
||||
ret = trace_seq_printf(s, "\ttime_delta : 27 bits\n");
|
||||
ret = trace_seq_printf(s, "\tarray : 32 bits\n");
|
||||
ret = trace_seq_printf(s, "\n");
|
||||
ret = trace_seq_puts(s, "# compressed entry header\n");
|
||||
ret = trace_seq_puts(s, "\ttype_len : 5 bits\n");
|
||||
ret = trace_seq_puts(s, "\ttime_delta : 27 bits\n");
|
||||
ret = trace_seq_puts(s, "\tarray : 32 bits\n");
|
||||
ret = trace_seq_putc(s, '\n');
|
||||
ret = trace_seq_printf(s, "\tpadding : type == %d\n",
|
||||
RINGBUF_TYPE_PADDING);
|
||||
ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
|
||||
@@ -1066,7 +1066,7 @@ static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
}
|
||||
|
||||
/**
|
||||
* check_pages - integrity check of buffer pages
|
||||
* rb_check_pages - integrity check of buffer pages
|
||||
* @cpu_buffer: CPU buffer with pages to test
|
||||
*
|
||||
* As a safety measure we check to make sure the data pages have not
|
||||
@@ -1258,7 +1258,7 @@ static int rb_cpu_notify(struct notifier_block *self,
|
||||
#endif
|
||||
|
||||
/**
|
||||
* ring_buffer_alloc - allocate a new ring_buffer
|
||||
* __ring_buffer_alloc - allocate a new ring_buffer
|
||||
* @size: the size in bytes per cpu that is needed.
|
||||
* @flags: attributes to set for the ring buffer.
|
||||
*
|
||||
@@ -1607,6 +1607,7 @@ static void update_pages_handler(struct work_struct *work)
|
||||
* ring_buffer_resize - resize the ring buffer
|
||||
* @buffer: the buffer to resize.
|
||||
* @size: the new size.
|
||||
* @cpu_id: the cpu buffer to resize
|
||||
*
|
||||
* Minimum size is 2 * BUF_PAGE_SIZE.
|
||||
*
|
||||
@@ -3956,11 +3957,11 @@ EXPORT_SYMBOL_GPL(ring_buffer_consume);
|
||||
* expected.
|
||||
*
|
||||
* After a sequence of ring_buffer_read_prepare calls, the user is
|
||||
* expected to make at least one call to ring_buffer_prepare_sync.
|
||||
* expected to make at least one call to ring_buffer_read_prepare_sync.
|
||||
* Afterwards, ring_buffer_read_start is invoked to get things going
|
||||
* for real.
|
||||
*
|
||||
* This overall must be paired with ring_buffer_finish.
|
||||
* This overall must be paired with ring_buffer_read_finish.
|
||||
*/
|
||||
struct ring_buffer_iter *
|
||||
ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
|
||||
@@ -4009,7 +4010,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
|
||||
* an intervening ring_buffer_read_prepare_sync must have been
|
||||
* performed.
|
||||
*
|
||||
* Must be paired with ring_buffer_finish.
|
||||
* Must be paired with ring_buffer_read_finish.
|
||||
*/
|
||||
void
|
||||
ring_buffer_read_start(struct ring_buffer_iter *iter)
|
||||
@@ -4031,7 +4032,7 @@ ring_buffer_read_start(struct ring_buffer_iter *iter)
|
||||
EXPORT_SYMBOL_GPL(ring_buffer_read_start);
|
||||
|
||||
/**
|
||||
* ring_buffer_finish - finish reading the iterator of the buffer
|
||||
* ring_buffer_read_finish - finish reading the iterator of the buffer
|
||||
* @iter: The iterator retrieved by ring_buffer_start
|
||||
*
|
||||
* This re-enables the recording to the buffer, and frees the
|
||||
@@ -4346,6 +4347,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
|
||||
/**
|
||||
* ring_buffer_alloc_read_page - allocate a page to read from buffer
|
||||
* @buffer: the buffer to allocate for.
|
||||
* @cpu: the cpu buffer to allocate.
|
||||
*
|
||||
* This function is used in conjunction with ring_buffer_read_page.
|
||||
* When reading a full page from the ring buffer, these functions
|
||||
@@ -4403,7 +4405,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
|
||||
* to swap with a page in the ring buffer.
|
||||
*
|
||||
* for example:
|
||||
* rpage = ring_buffer_alloc_read_page(buffer);
|
||||
* rpage = ring_buffer_alloc_read_page(buffer, cpu);
|
||||
* if (!rpage)
|
||||
* return error;
|
||||
* ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
|
||||
|
@@ -115,6 +115,9 @@ cpumask_var_t __read_mostly tracing_buffer_mask;
|
||||
|
||||
enum ftrace_dump_mode ftrace_dump_on_oops;
|
||||
|
||||
/* When set, tracing will stop when a WARN*() is hit */
|
||||
int __disable_trace_on_warning;
|
||||
|
||||
static int tracing_set_tracer(const char *buf);
|
||||
|
||||
#define MAX_TRACER_SIZE 100
|
||||
@@ -149,6 +152,13 @@ static int __init set_ftrace_dump_on_oops(char *str)
|
||||
}
|
||||
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
|
||||
|
||||
static int __init stop_trace_on_warning(char *str)
|
||||
{
|
||||
__disable_trace_on_warning = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("traceoff_on_warning=", stop_trace_on_warning);
|
||||
|
||||
static int __init boot_alloc_snapshot(char *str)
|
||||
{
|
||||
allocate_snapshot = true;
|
||||
@@ -170,6 +180,7 @@ static int __init set_trace_boot_options(char *str)
|
||||
}
|
||||
__setup("trace_options=", set_trace_boot_options);
|
||||
|
||||
|
||||
unsigned long long ns2usecs(cycle_t nsec)
|
||||
{
|
||||
nsec += 500;
|
||||
@@ -193,6 +204,37 @@ static struct trace_array global_trace;
|
||||
|
||||
LIST_HEAD(ftrace_trace_arrays);
|
||||
|
||||
int trace_array_get(struct trace_array *this_tr)
|
||||
{
|
||||
struct trace_array *tr;
|
||||
int ret = -ENODEV;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
|
||||
if (tr == this_tr) {
|
||||
tr->ref++;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __trace_array_put(struct trace_array *this_tr)
|
||||
{
|
||||
WARN_ON(!this_tr->ref);
|
||||
this_tr->ref--;
|
||||
}
|
||||
|
||||
void trace_array_put(struct trace_array *this_tr)
|
||||
{
|
||||
mutex_lock(&trace_types_lock);
|
||||
__trace_array_put(this_tr);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
}
|
||||
|
||||
int filter_current_check_discard(struct ring_buffer *buffer,
|
||||
struct ftrace_event_call *call, void *rec,
|
||||
struct ring_buffer_event *event)
|
||||
@@ -215,9 +257,24 @@ cycle_t ftrace_now(int cpu)
|
||||
return ts;
|
||||
}
|
||||
|
||||
/**
|
||||
* tracing_is_enabled - Show if global_trace has been disabled
|
||||
*
|
||||
* Shows if the global trace has been enabled or not. It uses the
|
||||
* mirror flag "buffer_disabled" to be used in fast paths such as for
|
||||
* the irqsoff tracer. But it may be inaccurate due to races. If you
|
||||
* need to know the accurate state, use tracing_is_on() which is a little
|
||||
* slower, but accurate.
|
||||
*/
|
||||
int tracing_is_enabled(void)
|
||||
{
|
||||
return tracing_is_on();
|
||||
/*
|
||||
* For quick access (irqsoff uses this in fast path), just
|
||||
* return the mirror variable of the state of the ring buffer.
|
||||
* It's a little racy, but we don't really care.
|
||||
*/
|
||||
smp_rmb();
|
||||
return !global_trace.buffer_disabled;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -240,7 +297,7 @@ static struct tracer *trace_types __read_mostly;
|
||||
/*
|
||||
* trace_types_lock is used to protect the trace_types list.
|
||||
*/
|
||||
static DEFINE_MUTEX(trace_types_lock);
|
||||
DEFINE_MUTEX(trace_types_lock);
|
||||
|
||||
/*
|
||||
* serialize the access of the ring buffer
|
||||
@@ -330,6 +387,23 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
|
||||
TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
|
||||
TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
|
||||
|
||||
static void tracer_tracing_on(struct trace_array *tr)
|
||||
{
|
||||
if (tr->trace_buffer.buffer)
|
||||
ring_buffer_record_on(tr->trace_buffer.buffer);
|
||||
/*
|
||||
* This flag is looked at when buffers haven't been allocated
|
||||
* yet, or by some tracers (like irqsoff), that just want to
|
||||
* know if the ring buffer has been disabled, but it can handle
|
||||
* races of where it gets disabled but we still do a record.
|
||||
* As the check is in the fast path of the tracers, it is more
|
||||
* important to be fast than accurate.
|
||||
*/
|
||||
tr->buffer_disabled = 0;
|
||||
/* Make the flag seen by readers */
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
/**
|
||||
* tracing_on - enable tracing buffers
|
||||
*
|
||||
@@ -338,15 +412,7 @@ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
|
||||
*/
|
||||
void tracing_on(void)
|
||||
{
|
||||
if (global_trace.trace_buffer.buffer)
|
||||
ring_buffer_record_on(global_trace.trace_buffer.buffer);
|
||||
/*
|
||||
* This flag is only looked at when buffers haven't been
|
||||
* allocated yet. We don't really care about the race
|
||||
* between setting this flag and actually turning
|
||||
* on the buffer.
|
||||
*/
|
||||
global_trace.buffer_disabled = 0;
|
||||
tracer_tracing_on(&global_trace);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_on);
|
||||
|
||||
@@ -540,6 +606,23 @@ void tracing_snapshot_alloc(void)
|
||||
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
|
||||
#endif /* CONFIG_TRACER_SNAPSHOT */
|
||||
|
||||
static void tracer_tracing_off(struct trace_array *tr)
|
||||
{
|
||||
if (tr->trace_buffer.buffer)
|
||||
ring_buffer_record_off(tr->trace_buffer.buffer);
|
||||
/*
|
||||
* This flag is looked at when buffers haven't been allocated
|
||||
* yet, or by some tracers (like irqsoff), that just want to
|
||||
* know if the ring buffer has been disabled, but it can handle
|
||||
* races of where it gets disabled but we still do a record.
|
||||
* As the check is in the fast path of the tracers, it is more
|
||||
* important to be fast than accurate.
|
||||
*/
|
||||
tr->buffer_disabled = 1;
|
||||
/* Make the flag seen by readers */
|
||||
smp_wmb();
|
||||
}
|
||||
|
||||
/**
|
||||
* tracing_off - turn off tracing buffers
|
||||
*
|
||||
@@ -550,26 +633,35 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
|
||||
*/
|
||||
void tracing_off(void)
|
||||
{
|
||||
if (global_trace.trace_buffer.buffer)
|
||||
ring_buffer_record_off(global_trace.trace_buffer.buffer);
|
||||
/*
|
||||
* This flag is only looked at when buffers haven't been
|
||||
* allocated yet. We don't really care about the race
|
||||
* between setting this flag and actually turning
|
||||
* on the buffer.
|
||||
*/
|
||||
global_trace.buffer_disabled = 1;
|
||||
tracer_tracing_off(&global_trace);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_off);
|
||||
|
||||
void disable_trace_on_warning(void)
|
||||
{
|
||||
if (__disable_trace_on_warning)
|
||||
tracing_off();
|
||||
}
|
||||
|
||||
/**
|
||||
* tracer_tracing_is_on - show real state of ring buffer enabled
|
||||
* @tr : the trace array to know if ring buffer is enabled
|
||||
*
|
||||
* Shows real state of the ring buffer if it is enabled or not.
|
||||
*/
|
||||
static int tracer_tracing_is_on(struct trace_array *tr)
|
||||
{
|
||||
if (tr->trace_buffer.buffer)
|
||||
return ring_buffer_record_is_on(tr->trace_buffer.buffer);
|
||||
return !tr->buffer_disabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* tracing_is_on - show state of ring buffers enabled
|
||||
*/
|
||||
int tracing_is_on(void)
|
||||
{
|
||||
if (global_trace.trace_buffer.buffer)
|
||||
return ring_buffer_record_is_on(global_trace.trace_buffer.buffer);
|
||||
return !global_trace.buffer_disabled;
|
||||
return tracer_tracing_is_on(&global_trace);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tracing_is_on);
|
||||
|
||||
@@ -1543,15 +1635,6 @@ trace_function(struct trace_array *tr,
|
||||
__buffer_unlock_commit(buffer, event);
|
||||
}
|
||||
|
||||
void
|
||||
ftrace(struct trace_array *tr, struct trace_array_cpu *data,
|
||||
unsigned long ip, unsigned long parent_ip, unsigned long flags,
|
||||
int pc)
|
||||
{
|
||||
if (likely(!atomic_read(&data->disabled)))
|
||||
trace_function(tr, ip, parent_ip, flags, pc);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
|
||||
#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
|
||||
@@ -2768,10 +2851,9 @@ static const struct seq_operations tracer_seq_ops = {
|
||||
};
|
||||
|
||||
static struct trace_iterator *
|
||||
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
|
||||
__tracing_open(struct trace_array *tr, struct trace_cpu *tc,
|
||||
struct inode *inode, struct file *file, bool snapshot)
|
||||
{
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
struct trace_iterator *iter;
|
||||
int cpu;
|
||||
|
||||
@@ -2850,8 +2932,6 @@ __tracing_open(struct inode *inode, struct file *file, bool snapshot)
|
||||
tracing_iter_reset(iter, cpu);
|
||||
}
|
||||
|
||||
tr->ref++;
|
||||
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
return iter;
|
||||
@@ -2874,6 +2954,43 @@ int tracing_open_generic(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Open and update trace_array ref count.
|
||||
* Must have the current trace_array passed to it.
|
||||
*/
|
||||
static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
filp->private_data = inode->i_private;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
filp->private_data = inode->i_private;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
static int tracing_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m = file->private_data;
|
||||
@@ -2881,17 +2998,19 @@ static int tracing_release(struct inode *inode, struct file *file)
|
||||
struct trace_array *tr;
|
||||
int cpu;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ))
|
||||
/* Writes do not use seq_file, need to grab tr from inode */
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
|
||||
trace_array_put(tc->tr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
iter = m->private;
|
||||
tr = iter->tr;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
WARN_ON(!tr->ref);
|
||||
tr->ref--;
|
||||
|
||||
for_each_tracing_cpu(cpu) {
|
||||
if (iter->buffer_iter[cpu])
|
||||
ring_buffer_read_finish(iter->buffer_iter[cpu]);
|
||||
@@ -2903,6 +3022,9 @@ static int tracing_release(struct inode *inode, struct file *file)
|
||||
if (!iter->snapshot)
|
||||
/* reenable tracing if it was previously enabled */
|
||||
tracing_start_tr(tr);
|
||||
|
||||
__trace_array_put(tr);
|
||||
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
mutex_destroy(&iter->mutex);
|
||||
@@ -2910,20 +3032,49 @@ static int tracing_release(struct inode *inode, struct file *file)
|
||||
kfree(iter->trace);
|
||||
kfree(iter->buffer_iter);
|
||||
seq_release_private(inode, file);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tracing_release_generic_tr(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
|
||||
trace_array_put(tr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tracing_release_generic_tc(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
|
||||
trace_array_put(tr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tracing_single_release_tr(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
|
||||
trace_array_put(tr);
|
||||
|
||||
return single_release(inode, file);
|
||||
}
|
||||
|
||||
static int tracing_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
struct trace_iterator *iter;
|
||||
int ret = 0;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
/* If this file was open for write, then erase contents */
|
||||
if ((file->f_mode & FMODE_WRITE) &&
|
||||
(file->f_flags & O_TRUNC)) {
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
|
||||
if (tc->cpu == RING_BUFFER_ALL_CPUS)
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
else
|
||||
@@ -2931,12 +3082,16 @@ static int tracing_open(struct inode *inode, struct file *file)
|
||||
}
|
||||
|
||||
if (file->f_mode & FMODE_READ) {
|
||||
iter = __tracing_open(inode, file, false);
|
||||
iter = __tracing_open(tr, tc, inode, file, false);
|
||||
if (IS_ERR(iter))
|
||||
ret = PTR_ERR(iter);
|
||||
else if (trace_flags & TRACE_ITER_LATENCY_FMT)
|
||||
iter->iter_flags |= TRACE_FILE_LAT_FMT;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3293,17 +3448,27 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
|
||||
|
||||
static int tracing_trace_options_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
int ret;
|
||||
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
return single_open(file, tracing_trace_options_show, inode->i_private);
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
ret = single_open(file, tracing_trace_options_show, inode->i_private);
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations tracing_iter_fops = {
|
||||
.open = tracing_trace_options_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.release = tracing_single_release_tr,
|
||||
.write = tracing_trace_options_write,
|
||||
};
|
||||
|
||||
@@ -3379,14 +3544,14 @@ static const char readme_msg[] =
|
||||
"\n snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
|
||||
"\t\t\t Read the contents for more information\n"
|
||||
#endif
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
#ifdef CONFIG_STACK_TRACER
|
||||
" stack_trace\t\t- Shows the max stack trace when active\n"
|
||||
" stack_max_size\t- Shows current max stack size that was traced\n"
|
||||
"\t\t\t Write into this file to reset the max size (trigger a new trace)\n"
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
" stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
|
||||
#endif
|
||||
#endif /* CONFIG_STACKTRACE */
|
||||
#endif /* CONFIG_STACK_TRACER */
|
||||
;
|
||||
|
||||
static ssize_t
|
||||
@@ -3791,12 +3956,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
/* create a buffer to store the information to pass to userspace */
|
||||
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
|
||||
if (!iter) {
|
||||
ret = -ENOMEM;
|
||||
__trace_array_put(tr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -3843,6 +4012,7 @@ out:
|
||||
fail:
|
||||
kfree(iter->trace);
|
||||
kfree(iter);
|
||||
__trace_array_put(tr);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
return ret;
|
||||
}
|
||||
@@ -3850,6 +4020,8 @@ fail:
|
||||
static int tracing_release_pipe(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_iterator *iter = file->private_data;
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
@@ -3863,6 +4035,8 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
|
||||
kfree(iter->trace);
|
||||
kfree(iter);
|
||||
|
||||
trace_array_put(tr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3939,7 +4113,7 @@ static int tracing_wait_pipe(struct file *filp)
|
||||
*
|
||||
* iter->pos will be 0 if we haven't read anything.
|
||||
*/
|
||||
if (!tracing_is_enabled() && iter->pos)
|
||||
if (!tracing_is_on() && iter->pos)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4320,6 +4494,8 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
|
||||
/* resize the ring buffer to 0 */
|
||||
tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
|
||||
|
||||
trace_array_put(tr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4328,6 +4504,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *fpos)
|
||||
{
|
||||
unsigned long addr = (unsigned long)ubuf;
|
||||
struct trace_array *tr = filp->private_data;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer;
|
||||
struct print_entry *entry;
|
||||
@@ -4387,7 +4564,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
|
||||
|
||||
local_save_flags(irq_flags);
|
||||
size = sizeof(*entry) + cnt + 2; /* possible \n added */
|
||||
buffer = global_trace.trace_buffer.buffer;
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
|
||||
irq_flags, preempt_count());
|
||||
if (!event) {
|
||||
@@ -4495,10 +4672,20 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
|
||||
|
||||
static int tracing_clock_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
int ret;
|
||||
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
return single_open(file, tracing_clock_show, inode->i_private);
|
||||
if (trace_array_get(tr))
|
||||
return -ENODEV;
|
||||
|
||||
ret = single_open(file, tracing_clock_show, inode->i_private);
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct ftrace_buffer_info {
|
||||
@@ -4511,30 +4698,40 @@ struct ftrace_buffer_info {
|
||||
static int tracing_snapshot_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
struct trace_iterator *iter;
|
||||
struct seq_file *m;
|
||||
int ret = 0;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
if (file->f_mode & FMODE_READ) {
|
||||
iter = __tracing_open(inode, file, true);
|
||||
iter = __tracing_open(tr, tc, inode, file, true);
|
||||
if (IS_ERR(iter))
|
||||
ret = PTR_ERR(iter);
|
||||
} else {
|
||||
/* Writes still need the seq_file to hold the private data */
|
||||
ret = -ENOMEM;
|
||||
m = kzalloc(sizeof(*m), GFP_KERNEL);
|
||||
if (!m)
|
||||
return -ENOMEM;
|
||||
goto out;
|
||||
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
|
||||
if (!iter) {
|
||||
kfree(m);
|
||||
return -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
iter->tr = tc->tr;
|
||||
ret = 0;
|
||||
|
||||
iter->tr = tr;
|
||||
iter->trace_buffer = &tc->tr->max_buffer;
|
||||
iter->cpu_file = tc->cpu;
|
||||
m->private = iter;
|
||||
file->private_data = m;
|
||||
}
|
||||
out:
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -4616,9 +4813,12 @@ out:
|
||||
static int tracing_snapshot_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct seq_file *m = file->private_data;
|
||||
int ret;
|
||||
|
||||
ret = tracing_release(inode, file);
|
||||
|
||||
if (file->f_mode & FMODE_READ)
|
||||
return tracing_release(inode, file);
|
||||
return ret;
|
||||
|
||||
/* If write only, the seq_file is just a stub */
|
||||
if (m)
|
||||
@@ -4684,34 +4884,38 @@ static const struct file_operations tracing_pipe_fops = {
|
||||
};
|
||||
|
||||
static const struct file_operations tracing_entries_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_tc,
|
||||
.read = tracing_entries_read,
|
||||
.write = tracing_entries_write,
|
||||
.llseek = generic_file_llseek,
|
||||
.release = tracing_release_generic_tc,
|
||||
};
|
||||
|
||||
static const struct file_operations tracing_total_entries_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_tr,
|
||||
.read = tracing_total_entries_read,
|
||||
.llseek = generic_file_llseek,
|
||||
.release = tracing_release_generic_tr,
|
||||
};
|
||||
|
||||
static const struct file_operations tracing_free_buffer_fops = {
|
||||
.open = tracing_open_generic_tr,
|
||||
.write = tracing_free_buffer_write,
|
||||
.release = tracing_free_buffer_release,
|
||||
};
|
||||
|
||||
static const struct file_operations tracing_mark_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_tr,
|
||||
.write = tracing_mark_write,
|
||||
.llseek = generic_file_llseek,
|
||||
.release = tracing_release_generic_tr,
|
||||
};
|
||||
|
||||
static const struct file_operations trace_clock_fops = {
|
||||
.open = tracing_clock_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
.release = tracing_single_release_tr,
|
||||
.write = tracing_clock_write,
|
||||
};
|
||||
|
||||
@@ -4739,18 +4943,22 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
|
||||
struct trace_cpu *tc = inode->i_private;
|
||||
struct trace_array *tr = tc->tr;
|
||||
struct ftrace_buffer_info *info;
|
||||
int ret;
|
||||
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
info = kzalloc(sizeof(*info), GFP_KERNEL);
|
||||
if (!info)
|
||||
if (!info) {
|
||||
trace_array_put(tr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
tr->ref++;
|
||||
|
||||
info->iter.tr = tr;
|
||||
info->iter.cpu_file = tc->cpu;
|
||||
info->iter.trace = tr->current_trace;
|
||||
@@ -4763,7 +4971,11 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
|
||||
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
return nonseekable_open(inode, filp);
|
||||
ret = nonseekable_open(inode, filp);
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
@@ -4863,8 +5075,7 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
|
||||
WARN_ON(!iter->tr->ref);
|
||||
iter->tr->ref--;
|
||||
__trace_array_put(iter->tr);
|
||||
|
||||
if (info->spare)
|
||||
ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
|
||||
@@ -5126,9 +5337,10 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
|
||||
}
|
||||
|
||||
static const struct file_operations tracing_stats_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_tc,
|
||||
.read = tracing_stats_read,
|
||||
.llseek = generic_file_llseek,
|
||||
.release = tracing_release_generic_tc,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
@@ -5612,15 +5824,10 @@ rb_simple_read(struct file *filp, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
struct trace_array *tr = filp->private_data;
|
||||
struct ring_buffer *buffer = tr->trace_buffer.buffer;
|
||||
char buf[64];
|
||||
int r;
|
||||
|
||||
if (buffer)
|
||||
r = ring_buffer_record_is_on(buffer);
|
||||
else
|
||||
r = 0;
|
||||
|
||||
r = tracer_tracing_is_on(tr);
|
||||
r = sprintf(buf, "%d\n", r);
|
||||
|
||||
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
||||
@@ -5642,11 +5849,11 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
|
||||
if (buffer) {
|
||||
mutex_lock(&trace_types_lock);
|
||||
if (val) {
|
||||
ring_buffer_record_on(buffer);
|
||||
tracer_tracing_on(tr);
|
||||
if (tr->current_trace->start)
|
||||
tr->current_trace->start(tr);
|
||||
} else {
|
||||
ring_buffer_record_off(buffer);
|
||||
tracer_tracing_off(tr);
|
||||
if (tr->current_trace->stop)
|
||||
tr->current_trace->stop(tr);
|
||||
}
|
||||
@@ -5659,9 +5866,10 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
|
||||
}
|
||||
|
||||
static const struct file_operations rb_simple_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_tr,
|
||||
.read = rb_simple_read,
|
||||
.write = rb_simple_write,
|
||||
.release = tracing_release_generic_tr,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
@@ -5775,8 +5983,10 @@ static int new_instance_create(const char *name)
|
||||
goto out_free_tr;
|
||||
|
||||
ret = event_trace_add_tracer(tr->dir, tr);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
debugfs_remove_recursive(tr->dir);
|
||||
goto out_free_tr;
|
||||
}
|
||||
|
||||
init_tracer_debugfs(tr, tr->dir);
|
||||
|
||||
@@ -5933,7 +6143,7 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
|
||||
trace_create_file("buffer_total_size_kb", 0444, d_tracer,
|
||||
tr, &tracing_total_entries_fops);
|
||||
|
||||
trace_create_file("free_buffer", 0644, d_tracer,
|
||||
trace_create_file("free_buffer", 0200, d_tracer,
|
||||
tr, &tracing_free_buffer_fops);
|
||||
|
||||
trace_create_file("trace_marker", 0220, d_tracer,
|
||||
|
@@ -214,7 +214,6 @@ struct trace_array {
|
||||
struct dentry *event_dir;
|
||||
struct list_head systems;
|
||||
struct list_head events;
|
||||
struct task_struct *waiter;
|
||||
int ref;
|
||||
};
|
||||
|
||||
@@ -224,6 +223,11 @@ enum {
|
||||
|
||||
extern struct list_head ftrace_trace_arrays;
|
||||
|
||||
extern struct mutex trace_types_lock;
|
||||
|
||||
extern int trace_array_get(struct trace_array *tr);
|
||||
extern void trace_array_put(struct trace_array *tr);
|
||||
|
||||
/*
|
||||
* The global tracer (top) should be the first trace array added,
|
||||
* but we check the flag anyway.
|
||||
@@ -554,11 +558,6 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu);
|
||||
|
||||
void poll_wait_pipe(struct trace_iterator *iter);
|
||||
|
||||
void ftrace(struct trace_array *tr,
|
||||
struct trace_array_cpu *data,
|
||||
unsigned long ip,
|
||||
unsigned long parent_ip,
|
||||
unsigned long flags, int pc);
|
||||
void tracing_sched_switch_trace(struct trace_array *tr,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
@@ -680,6 +679,15 @@ extern int trace_selftest_startup_sched_switch(struct tracer *trace,
|
||||
struct trace_array *tr);
|
||||
extern int trace_selftest_startup_branch(struct tracer *trace,
|
||||
struct trace_array *tr);
|
||||
/*
|
||||
* Tracer data references selftest functions that only occur
|
||||
* on boot up. These can be __init functions. Thus, when selftests
|
||||
* are enabled, then the tracers need to reference __init functions.
|
||||
*/
|
||||
#define __tracer_data __refdata
|
||||
#else
|
||||
/* Tracers are seldom changed. Optimize when selftests are disabled. */
|
||||
#define __tracer_data __read_mostly
|
||||
#endif /* CONFIG_FTRACE_STARTUP_TEST */
|
||||
|
||||
extern void *head_page(struct trace_array_cpu *data);
|
||||
@@ -774,6 +782,7 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
|
||||
extern struct list_head ftrace_pids;
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
extern bool ftrace_filter_param __initdata;
|
||||
static inline int ftrace_trace_task(struct task_struct *task)
|
||||
{
|
||||
if (list_empty(&ftrace_pids))
|
||||
@@ -899,12 +908,6 @@ static inline void trace_branch_disable(void)
|
||||
/* set ring buffers to default size if not already done so */
|
||||
int tracing_update_buffers(void);
|
||||
|
||||
/* trace event type bit fields, not numeric */
|
||||
enum {
|
||||
TRACE_EVENT_TYPE_PRINTF = 1,
|
||||
TRACE_EVENT_TYPE_RAW = 2,
|
||||
};
|
||||
|
||||
struct ftrace_event_field {
|
||||
struct list_head link;
|
||||
const char *name;
|
||||
|
@@ -236,6 +236,10 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
|
||||
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
|
||||
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
|
||||
"perf buffer not large enough"))
|
||||
return NULL;
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
*rctxp = perf_swevent_get_recursion_context();
|
||||
@@ -266,6 +270,10 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
|
||||
struct pt_regs regs;
|
||||
int rctx;
|
||||
|
||||
head = this_cpu_ptr(event_function.perf_events);
|
||||
if (hlist_empty(head))
|
||||
return;
|
||||
|
||||
#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
|
||||
sizeof(u64)) - sizeof(u32))
|
||||
|
||||
@@ -279,8 +287,6 @@ perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
|
||||
|
||||
entry->ip = ip;
|
||||
entry->parent_ip = parent_ip;
|
||||
|
||||
head = this_cpu_ptr(event_function.perf_events);
|
||||
perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
|
||||
1, ®s, head, NULL);
|
||||
|
||||
|
@@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields);
|
||||
static struct kmem_cache *field_cachep;
|
||||
static struct kmem_cache *file_cachep;
|
||||
|
||||
#define SYSTEM_FL_FREE_NAME (1 << 31)
|
||||
|
||||
static inline int system_refcount(struct event_subsystem *system)
|
||||
{
|
||||
return system->ref_count & ~SYSTEM_FL_FREE_NAME;
|
||||
}
|
||||
|
||||
static int system_refcount_inc(struct event_subsystem *system)
|
||||
{
|
||||
return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME;
|
||||
}
|
||||
|
||||
static int system_refcount_dec(struct event_subsystem *system)
|
||||
{
|
||||
return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME;
|
||||
}
|
||||
|
||||
/* Double loops, do not use break, only goto's work */
|
||||
#define do_for_each_event_file(tr, file) \
|
||||
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
|
||||
@@ -97,7 +114,7 @@ static int __trace_define_field(struct list_head *head, const char *type,
|
||||
|
||||
field = kmem_cache_alloc(field_cachep, GFP_TRACE);
|
||||
if (!field)
|
||||
goto err;
|
||||
return -ENOMEM;
|
||||
|
||||
field->name = name;
|
||||
field->type = type;
|
||||
@@ -114,11 +131,6 @@ static int __trace_define_field(struct list_head *head, const char *type,
|
||||
list_add(&field->link, head);
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
kmem_cache_free(field_cachep, field);
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int trace_define_field(struct ftrace_event_call *call, const char *type,
|
||||
@@ -279,9 +291,11 @@ static int __ftrace_event_enable_disable(struct ftrace_event_file *file,
|
||||
}
|
||||
call->class->reg(call, TRACE_REG_UNREGISTER, file);
|
||||
}
|
||||
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT */
|
||||
/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
|
||||
if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
|
||||
set_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
|
||||
else
|
||||
clear_bit(FTRACE_EVENT_FL_SOFT_DISABLED_BIT, &file->flags);
|
||||
break;
|
||||
case 1:
|
||||
/*
|
||||
@@ -349,8 +363,8 @@ static void __put_system(struct event_subsystem *system)
|
||||
{
|
||||
struct event_filter *filter = system->filter;
|
||||
|
||||
WARN_ON_ONCE(system->ref_count == 0);
|
||||
if (--system->ref_count)
|
||||
WARN_ON_ONCE(system_refcount(system) == 0);
|
||||
if (system_refcount_dec(system))
|
||||
return;
|
||||
|
||||
list_del(&system->list);
|
||||
@@ -359,13 +373,15 @@ static void __put_system(struct event_subsystem *system)
|
||||
kfree(filter->filter_string);
|
||||
kfree(filter);
|
||||
}
|
||||
if (system->ref_count & SYSTEM_FL_FREE_NAME)
|
||||
kfree(system->name);
|
||||
kfree(system);
|
||||
}
|
||||
|
||||
static void __get_system(struct event_subsystem *system)
|
||||
{
|
||||
WARN_ON_ONCE(system->ref_count == 0);
|
||||
system->ref_count++;
|
||||
WARN_ON_ONCE(system_refcount(system) == 0);
|
||||
system_refcount_inc(system);
|
||||
}
|
||||
|
||||
static void __get_system_dir(struct ftrace_subsystem_dir *dir)
|
||||
@@ -379,7 +395,7 @@ static void __put_system_dir(struct ftrace_subsystem_dir *dir)
|
||||
{
|
||||
WARN_ON_ONCE(dir->ref_count == 0);
|
||||
/* If the subsystem is about to be freed, the dir must be too */
|
||||
WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
|
||||
WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
|
||||
|
||||
__put_system(dir->subsystem);
|
||||
if (!--dir->ref_count)
|
||||
@@ -393,17 +409,46 @@ static void put_system(struct ftrace_subsystem_dir *dir)
|
||||
mutex_unlock(&event_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Open and update trace_array ref count.
|
||||
* Must have the current trace_array passed to it.
|
||||
*/
|
||||
static int tracing_open_generic_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct ftrace_event_file *file = inode->i_private;
|
||||
struct trace_array *tr = file->tr;
|
||||
int ret;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
ret = tracing_open_generic(inode, filp);
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tracing_release_generic_file(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct ftrace_event_file *file = inode->i_private;
|
||||
struct trace_array *tr = file->tr;
|
||||
|
||||
trace_array_put(tr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
|
||||
*/
|
||||
static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
|
||||
const char *sub, const char *event, int set)
|
||||
static int
|
||||
__ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
|
||||
const char *sub, const char *event, int set)
|
||||
{
|
||||
struct ftrace_event_file *file;
|
||||
struct ftrace_event_call *call;
|
||||
int ret = -EINVAL;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
list_for_each_entry(file, &tr->events, list) {
|
||||
|
||||
call = file->event_call;
|
||||
@@ -429,6 +474,17 @@ static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
|
||||
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
|
||||
const char *sub, const char *event, int set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
|
||||
mutex_unlock(&event_mutex);
|
||||
|
||||
return ret;
|
||||
@@ -624,17 +680,17 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct ftrace_event_file *file = filp->private_data;
|
||||
char *buf;
|
||||
char buf[4] = "0";
|
||||
|
||||
if (file->flags & FTRACE_EVENT_FL_ENABLED) {
|
||||
if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED)
|
||||
buf = "0*\n";
|
||||
else if (file->flags & FTRACE_EVENT_FL_SOFT_MODE)
|
||||
buf = "1*\n";
|
||||
else
|
||||
buf = "1\n";
|
||||
} else
|
||||
buf = "0\n";
|
||||
if (file->flags & FTRACE_EVENT_FL_ENABLED &&
|
||||
!(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
|
||||
strcpy(buf, "1");
|
||||
|
||||
if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
|
||||
file->flags & FTRACE_EVENT_FL_SOFT_MODE)
|
||||
strcat(buf, "*");
|
||||
|
||||
strcat(buf, "\n");
|
||||
|
||||
return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
|
||||
}
|
||||
@@ -770,59 +826,33 @@ enum {
|
||||
static void *f_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
struct ftrace_event_call *call = m->private;
|
||||
struct ftrace_event_field *field;
|
||||
struct list_head *common_head = &ftrace_common_fields;
|
||||
struct list_head *head = trace_get_fields(call);
|
||||
struct list_head *node = v;
|
||||
|
||||
(*pos)++;
|
||||
|
||||
switch ((unsigned long)v) {
|
||||
case FORMAT_HEADER:
|
||||
if (unlikely(list_empty(common_head)))
|
||||
return NULL;
|
||||
|
||||
field = list_entry(common_head->prev,
|
||||
struct ftrace_event_field, link);
|
||||
return field;
|
||||
node = common_head;
|
||||
break;
|
||||
|
||||
case FORMAT_FIELD_SEPERATOR:
|
||||
if (unlikely(list_empty(head)))
|
||||
return NULL;
|
||||
|
||||
field = list_entry(head->prev, struct ftrace_event_field, link);
|
||||
return field;
|
||||
node = head;
|
||||
break;
|
||||
|
||||
case FORMAT_PRINTFMT:
|
||||
/* all done */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
field = v;
|
||||
if (field->link.prev == common_head)
|
||||
node = node->prev;
|
||||
if (node == common_head)
|
||||
return (void *)FORMAT_FIELD_SEPERATOR;
|
||||
else if (field->link.prev == head)
|
||||
else if (node == head)
|
||||
return (void *)FORMAT_PRINTFMT;
|
||||
|
||||
field = list_entry(field->link.prev, struct ftrace_event_field, link);
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
static void *f_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
loff_t l = 0;
|
||||
void *p;
|
||||
|
||||
/* Start by showing the header */
|
||||
if (!*pos)
|
||||
return (void *)FORMAT_HEADER;
|
||||
|
||||
p = (void *)FORMAT_HEADER;
|
||||
do {
|
||||
p = f_next(m, p, &l);
|
||||
} while (p && l < *pos);
|
||||
|
||||
return p;
|
||||
else
|
||||
return node;
|
||||
}
|
||||
|
||||
static int f_show(struct seq_file *m, void *v)
|
||||
@@ -848,8 +878,7 @@ static int f_show(struct seq_file *m, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
field = v;
|
||||
|
||||
field = list_entry(v, struct ftrace_event_field, link);
|
||||
/*
|
||||
* Smartly shows the array type(except dynamic array).
|
||||
* Normal:
|
||||
@@ -876,6 +905,17 @@ static int f_show(struct seq_file *m, void *v)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void *f_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
void *p = (void *)FORMAT_HEADER;
|
||||
loff_t l = 0;
|
||||
|
||||
while (l < *pos && p)
|
||||
p = f_next(m, p, &l);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static void f_stop(struct seq_file *m, void *p)
|
||||
{
|
||||
}
|
||||
@@ -907,23 +947,14 @@ static ssize_t
|
||||
event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
|
||||
{
|
||||
struct ftrace_event_call *call = filp->private_data;
|
||||
struct trace_seq *s;
|
||||
int r;
|
||||
char buf[32];
|
||||
int len;
|
||||
|
||||
if (*ppos)
|
||||
return 0;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
trace_seq_init(s);
|
||||
trace_seq_printf(s, "%d\n", call->event.type);
|
||||
|
||||
r = simple_read_from_buffer(ubuf, cnt, ppos,
|
||||
s->buffer, s->len);
|
||||
kfree(s);
|
||||
return r;
|
||||
len = sprintf(buf, "%d\n", call->event.type);
|
||||
return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
@@ -992,6 +1023,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
|
||||
int ret;
|
||||
|
||||
/* Make sure the system still exists */
|
||||
mutex_lock(&trace_types_lock);
|
||||
mutex_lock(&event_mutex);
|
||||
list_for_each_entry(tr, &ftrace_trace_arrays, list) {
|
||||
list_for_each_entry(dir, &tr->systems, list) {
|
||||
@@ -1007,6 +1039,7 @@ static int subsystem_open(struct inode *inode, struct file *filp)
|
||||
}
|
||||
exit_loop:
|
||||
mutex_unlock(&event_mutex);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
if (!system)
|
||||
return -ENODEV;
|
||||
@@ -1014,9 +1047,17 @@ static int subsystem_open(struct inode *inode, struct file *filp)
|
||||
/* Some versions of gcc think dir can be uninitialized here */
|
||||
WARN_ON(!dir);
|
||||
|
||||
ret = tracing_open_generic(inode, filp);
|
||||
if (ret < 0)
|
||||
/* Still need to increment the ref count of the system */
|
||||
if (trace_array_get(tr) < 0) {
|
||||
put_system(dir);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ret = tracing_open_generic(inode, filp);
|
||||
if (ret < 0) {
|
||||
trace_array_put(tr);
|
||||
put_system(dir);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1027,16 +1068,23 @@ static int system_tr_open(struct inode *inode, struct file *filp)
|
||||
struct trace_array *tr = inode->i_private;
|
||||
int ret;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
/* Make a temporary dir that has no system but points to tr */
|
||||
dir = kzalloc(sizeof(*dir), GFP_KERNEL);
|
||||
if (!dir)
|
||||
if (!dir) {
|
||||
trace_array_put(tr);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dir->tr = tr;
|
||||
|
||||
ret = tracing_open_generic(inode, filp);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
trace_array_put(tr);
|
||||
kfree(dir);
|
||||
}
|
||||
|
||||
filp->private_data = dir;
|
||||
|
||||
@@ -1047,6 +1095,8 @@ static int subsystem_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ftrace_subsystem_dir *dir = file->private_data;
|
||||
|
||||
trace_array_put(dir->tr);
|
||||
|
||||
/*
|
||||
* If dir->subsystem is NULL, then this is a temporary
|
||||
* descriptor that was made for a trace_array to enable
|
||||
@@ -1143,6 +1193,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
|
||||
|
||||
static int ftrace_event_avail_open(struct inode *inode, struct file *file);
|
||||
static int ftrace_event_set_open(struct inode *inode, struct file *file);
|
||||
static int ftrace_event_release(struct inode *inode, struct file *file);
|
||||
|
||||
static const struct seq_operations show_event_seq_ops = {
|
||||
.start = t_start,
|
||||
@@ -1170,13 +1221,14 @@ static const struct file_operations ftrace_set_event_fops = {
|
||||
.read = seq_read,
|
||||
.write = ftrace_event_write,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
.release = ftrace_event_release,
|
||||
};
|
||||
|
||||
static const struct file_operations ftrace_enable_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.open = tracing_open_generic_file,
|
||||
.read = event_enable_read,
|
||||
.write = event_enable_write,
|
||||
.release = tracing_release_generic_file,
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
@@ -1247,6 +1299,15 @@ ftrace_event_open(struct inode *inode, struct file *file,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ftrace_event_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct trace_array *tr = inode->i_private;
|
||||
|
||||
trace_array_put(tr);
|
||||
|
||||
return seq_release(inode, file);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_event_avail_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
@@ -1260,12 +1321,19 @@ ftrace_event_set_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
const struct seq_operations *seq_ops = &show_set_event_seq_ops;
|
||||
struct trace_array *tr = inode->i_private;
|
||||
int ret;
|
||||
|
||||
if (trace_array_get(tr) < 0)
|
||||
return -ENODEV;
|
||||
|
||||
if ((file->f_mode & FMODE_WRITE) &&
|
||||
(file->f_flags & O_TRUNC))
|
||||
ftrace_clear_events(tr);
|
||||
|
||||
return ftrace_event_open(inode, file, seq_ops);
|
||||
ret = ftrace_event_open(inode, file, seq_ops);
|
||||
if (ret < 0)
|
||||
trace_array_put(tr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct event_subsystem *
|
||||
@@ -1279,7 +1347,15 @@ create_new_subsystem(const char *name)
|
||||
return NULL;
|
||||
|
||||
system->ref_count = 1;
|
||||
system->name = name;
|
||||
|
||||
/* Only allocate if dynamic (kprobes and modules) */
|
||||
if (!core_kernel_data((unsigned long)name)) {
|
||||
system->ref_count |= SYSTEM_FL_FREE_NAME;
|
||||
system->name = kstrdup(name, GFP_KERNEL);
|
||||
if (!system->name)
|
||||
goto out_free;
|
||||
} else
|
||||
system->name = name;
|
||||
|
||||
system->filter = NULL;
|
||||
|
||||
@@ -1292,6 +1368,8 @@ create_new_subsystem(const char *name)
|
||||
return system;
|
||||
|
||||
out_free:
|
||||
if (system->ref_count & SYSTEM_FL_FREE_NAME)
|
||||
kfree(system->name);
|
||||
kfree(system);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1591,6 +1669,7 @@ static void __add_event_to_tracers(struct ftrace_event_call *call,
|
||||
int trace_add_event_call(struct ftrace_event_call *call)
|
||||
{
|
||||
int ret;
|
||||
mutex_lock(&trace_types_lock);
|
||||
mutex_lock(&event_mutex);
|
||||
|
||||
ret = __register_event(call, NULL);
|
||||
@@ -1598,11 +1677,13 @@ int trace_add_event_call(struct ftrace_event_call *call)
|
||||
__add_event_to_tracers(call, NULL);
|
||||
|
||||
mutex_unlock(&event_mutex);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called under locking both of event_mutex and trace_event_sem.
|
||||
* Must be called under locking of trace_types_lock, event_mutex and
|
||||
* trace_event_sem.
|
||||
*/
|
||||
static void __trace_remove_event_call(struct ftrace_event_call *call)
|
||||
{
|
||||
@@ -1614,11 +1695,13 @@ static void __trace_remove_event_call(struct ftrace_event_call *call)
|
||||
/* Remove an event_call */
|
||||
void trace_remove_event_call(struct ftrace_event_call *call)
|
||||
{
|
||||
mutex_lock(&trace_types_lock);
|
||||
mutex_lock(&event_mutex);
|
||||
down_write(&trace_event_sem);
|
||||
__trace_remove_event_call(call);
|
||||
up_write(&trace_event_sem);
|
||||
mutex_unlock(&event_mutex);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
}
|
||||
|
||||
#define for_each_event(event, start, end) \
|
||||
@@ -1762,6 +1845,7 @@ static int trace_module_notify(struct notifier_block *self,
|
||||
{
|
||||
struct module *mod = data;
|
||||
|
||||
mutex_lock(&trace_types_lock);
|
||||
mutex_lock(&event_mutex);
|
||||
switch (val) {
|
||||
case MODULE_STATE_COMING:
|
||||
@@ -1772,6 +1856,7 @@ static int trace_module_notify(struct notifier_block *self,
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&event_mutex);
|
||||
mutex_unlock(&trace_types_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2011,10 +2096,7 @@ event_enable_func(struct ftrace_hash *hash,
|
||||
int ret;
|
||||
|
||||
/* hash funcs only work with set_ftrace_filter */
|
||||
if (!enabled)
|
||||
return -EINVAL;
|
||||
|
||||
if (!param)
|
||||
if (!enabled || !param)
|
||||
return -EINVAL;
|
||||
|
||||
system = strsep(¶m, ":");
|
||||
@@ -2329,11 +2411,11 @@ early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
|
||||
|
||||
int event_trace_del_tracer(struct trace_array *tr)
|
||||
{
|
||||
/* Disable any running events */
|
||||
__ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
|
||||
/* Disable any running events */
|
||||
__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
|
||||
|
||||
down_write(&trace_event_sem);
|
||||
__trace_remove_event_dirs(tr);
|
||||
debugfs_remove_recursive(tr->event_dir);
|
||||
|
@@ -44,6 +44,7 @@ enum filter_op_ids
|
||||
OP_LE,
|
||||
OP_GT,
|
||||
OP_GE,
|
||||
OP_BAND,
|
||||
OP_NONE,
|
||||
OP_OPEN_PAREN,
|
||||
};
|
||||
@@ -54,6 +55,7 @@ struct filter_op {
|
||||
int precedence;
|
||||
};
|
||||
|
||||
/* Order must be the same as enum filter_op_ids above */
|
||||
static struct filter_op filter_ops[] = {
|
||||
{ OP_OR, "||", 1 },
|
||||
{ OP_AND, "&&", 2 },
|
||||
@@ -64,6 +66,7 @@ static struct filter_op filter_ops[] = {
|
||||
{ OP_LE, "<=", 5 },
|
||||
{ OP_GT, ">", 5 },
|
||||
{ OP_GE, ">=", 5 },
|
||||
{ OP_BAND, "&", 6 },
|
||||
{ OP_NONE, "OP_NONE", 0 },
|
||||
{ OP_OPEN_PAREN, "(", 0 },
|
||||
};
|
||||
@@ -156,6 +159,9 @@ static int filter_pred_##type(struct filter_pred *pred, void *event) \
|
||||
case OP_GE: \
|
||||
match = (*addr >= val); \
|
||||
break; \
|
||||
case OP_BAND: \
|
||||
match = (*addr & val); \
|
||||
break; \
|
||||
default: \
|
||||
break; \
|
||||
} \
|
||||
@@ -640,7 +646,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
|
||||
if (filter && filter->filter_string)
|
||||
trace_seq_printf(s, "%s\n", filter->filter_string);
|
||||
else
|
||||
trace_seq_printf(s, "none\n");
|
||||
trace_seq_puts(s, "none\n");
|
||||
mutex_unlock(&event_mutex);
|
||||
}
|
||||
|
||||
@@ -654,7 +660,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
|
||||
if (filter && filter->filter_string)
|
||||
trace_seq_printf(s, "%s\n", filter->filter_string);
|
||||
else
|
||||
trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
|
||||
trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
|
||||
mutex_unlock(&event_mutex);
|
||||
}
|
||||
|
||||
|
@@ -199,7 +199,7 @@ static int func_set_flag(u32 old_flags, u32 bit, int set)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct tracer function_trace __read_mostly =
|
||||
static struct tracer function_trace __tracer_data =
|
||||
{
|
||||
.name = "function",
|
||||
.init = function_trace_init,
|
||||
@@ -290,6 +290,21 @@ ftrace_stacktrace_count(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
trace_dump_stack(STACK_SKIP);
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_dump_probe(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (update_count(data))
|
||||
ftrace_dump(DUMP_ALL);
|
||||
}
|
||||
|
||||
/* Only dump the current CPU buffer. */
|
||||
static void
|
||||
ftrace_cpudump_probe(unsigned long ip, unsigned long parent_ip, void **data)
|
||||
{
|
||||
if (update_count(data))
|
||||
ftrace_dump(DUMP_ORIG);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_probe_print(const char *name, struct seq_file *m,
|
||||
unsigned long ip, void *data)
|
||||
@@ -327,6 +342,20 @@ ftrace_stacktrace_print(struct seq_file *m, unsigned long ip,
|
||||
return ftrace_probe_print("stacktrace", m, ip, data);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_dump_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
{
|
||||
return ftrace_probe_print("dump", m, ip, data);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_cpudump_print(struct seq_file *m, unsigned long ip,
|
||||
struct ftrace_probe_ops *ops, void *data)
|
||||
{
|
||||
return ftrace_probe_print("cpudump", m, ip, data);
|
||||
}
|
||||
|
||||
static struct ftrace_probe_ops traceon_count_probe_ops = {
|
||||
.func = ftrace_traceon_count,
|
||||
.print = ftrace_traceon_print,
|
||||
@@ -342,6 +371,16 @@ static struct ftrace_probe_ops stacktrace_count_probe_ops = {
|
||||
.print = ftrace_stacktrace_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops dump_probe_ops = {
|
||||
.func = ftrace_dump_probe,
|
||||
.print = ftrace_dump_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops cpudump_probe_ops = {
|
||||
.func = ftrace_cpudump_probe,
|
||||
.print = ftrace_cpudump_print,
|
||||
};
|
||||
|
||||
static struct ftrace_probe_ops traceon_probe_ops = {
|
||||
.func = ftrace_traceon,
|
||||
.print = ftrace_traceon_print,
|
||||
@@ -425,6 +464,32 @@ ftrace_stacktrace_callback(struct ftrace_hash *hash,
|
||||
param, enable);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_dump_callback(struct ftrace_hash *hash,
|
||||
char *glob, char *cmd, char *param, int enable)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
|
||||
ops = &dump_probe_ops;
|
||||
|
||||
/* Only dump once. */
|
||||
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
|
||||
"1", enable);
|
||||
}
|
||||
|
||||
static int
|
||||
ftrace_cpudump_callback(struct ftrace_hash *hash,
|
||||
char *glob, char *cmd, char *param, int enable)
|
||||
{
|
||||
struct ftrace_probe_ops *ops;
|
||||
|
||||
ops = &cpudump_probe_ops;
|
||||
|
||||
/* Only dump once. */
|
||||
return ftrace_trace_probe_callback(ops, hash, glob, cmd,
|
||||
"1", enable);
|
||||
}
|
||||
|
||||
static struct ftrace_func_command ftrace_traceon_cmd = {
|
||||
.name = "traceon",
|
||||
.func = ftrace_trace_onoff_callback,
|
||||
@@ -440,6 +505,16 @@ static struct ftrace_func_command ftrace_stacktrace_cmd = {
|
||||
.func = ftrace_stacktrace_callback,
|
||||
};
|
||||
|
||||
static struct ftrace_func_command ftrace_dump_cmd = {
|
||||
.name = "dump",
|
||||
.func = ftrace_dump_callback,
|
||||
};
|
||||
|
||||
static struct ftrace_func_command ftrace_cpudump_cmd = {
|
||||
.name = "cpudump",
|
||||
.func = ftrace_cpudump_callback,
|
||||
};
|
||||
|
||||
static int __init init_func_cmd_traceon(void)
|
||||
{
|
||||
int ret;
|
||||
@@ -450,13 +525,31 @@ static int __init init_func_cmd_traceon(void)
|
||||
|
||||
ret = register_ftrace_command(&ftrace_traceon_cmd);
|
||||
if (ret)
|
||||
unregister_ftrace_command(&ftrace_traceoff_cmd);
|
||||
goto out_free_traceoff;
|
||||
|
||||
ret = register_ftrace_command(&ftrace_stacktrace_cmd);
|
||||
if (ret) {
|
||||
unregister_ftrace_command(&ftrace_traceoff_cmd);
|
||||
unregister_ftrace_command(&ftrace_traceon_cmd);
|
||||
}
|
||||
if (ret)
|
||||
goto out_free_traceon;
|
||||
|
||||
ret = register_ftrace_command(&ftrace_dump_cmd);
|
||||
if (ret)
|
||||
goto out_free_stacktrace;
|
||||
|
||||
ret = register_ftrace_command(&ftrace_cpudump_cmd);
|
||||
if (ret)
|
||||
goto out_free_dump;
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_dump:
|
||||
unregister_ftrace_command(&ftrace_dump_cmd);
|
||||
out_free_stacktrace:
|
||||
unregister_ftrace_command(&ftrace_stacktrace_cmd);
|
||||
out_free_traceon:
|
||||
unregister_ftrace_command(&ftrace_traceon_cmd);
|
||||
out_free_traceoff:
|
||||
unregister_ftrace_command(&ftrace_traceoff_cmd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
|
@@ -446,7 +446,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
|
||||
|
||||
/* First spaces to align center */
|
||||
for (i = 0; i < spaces / 2; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -457,7 +457,7 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
|
||||
|
||||
/* Last spaces to align center */
|
||||
for (i = 0; i < spaces - (spaces / 2); i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -503,7 +503,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
|
||||
------------------------------------------
|
||||
|
||||
*/
|
||||
ret = trace_seq_printf(s,
|
||||
ret = trace_seq_puts(s,
|
||||
" ------------------------------------------\n");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
@@ -516,7 +516,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
|
||||
if (ret == TRACE_TYPE_PARTIAL_LINE)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
ret = trace_seq_printf(s, " => ");
|
||||
ret = trace_seq_puts(s, " => ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
@@ -524,7 +524,7 @@ verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
|
||||
if (ret == TRACE_TYPE_PARTIAL_LINE)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
ret = trace_seq_printf(s,
|
||||
ret = trace_seq_puts(s,
|
||||
"\n ------------------------------------------\n\n");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
@@ -645,7 +645,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
|
||||
ret = print_graph_proc(s, pid);
|
||||
if (ret == TRACE_TYPE_PARTIAL_LINE)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
ret = trace_seq_printf(s, " | ");
|
||||
ret = trace_seq_puts(s, " | ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -657,9 +657,9 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
|
||||
return ret;
|
||||
|
||||
if (type == TRACE_GRAPH_ENT)
|
||||
ret = trace_seq_printf(s, "==========>");
|
||||
ret = trace_seq_puts(s, "==========>");
|
||||
else
|
||||
ret = trace_seq_printf(s, "<==========");
|
||||
ret = trace_seq_puts(s, "<==========");
|
||||
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
@@ -668,7 +668,7 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
|
||||
if (ret != TRACE_TYPE_HANDLED)
|
||||
return ret;
|
||||
|
||||
ret = trace_seq_printf(s, "\n");
|
||||
ret = trace_seq_putc(s, '\n');
|
||||
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
@@ -705,13 +705,13 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
|
||||
len += strlen(nsecs_str);
|
||||
}
|
||||
|
||||
ret = trace_seq_printf(s, " us ");
|
||||
ret = trace_seq_puts(s, " us ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
/* Print remaining spaces to fit the row's width */
|
||||
for (i = len; i < 7; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -731,13 +731,13 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
|
||||
/* No real adata, just filling the column with spaces */
|
||||
switch (duration) {
|
||||
case DURATION_FILL_FULL:
|
||||
ret = trace_seq_printf(s, " | ");
|
||||
ret = trace_seq_puts(s, " | ");
|
||||
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
|
||||
case DURATION_FILL_START:
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_puts(s, " ");
|
||||
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
|
||||
case DURATION_FILL_END:
|
||||
ret = trace_seq_printf(s, " |");
|
||||
ret = trace_seq_puts(s, " |");
|
||||
return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
|
||||
@@ -745,10 +745,10 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
|
||||
if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
|
||||
/* Duration exceeded 100 msecs */
|
||||
if (duration > 100000ULL)
|
||||
ret = trace_seq_printf(s, "! ");
|
||||
ret = trace_seq_puts(s, "! ");
|
||||
/* Duration exceeded 10 msecs */
|
||||
else if (duration > 10000ULL)
|
||||
ret = trace_seq_printf(s, "+ ");
|
||||
ret = trace_seq_puts(s, "+ ");
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -757,7 +757,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
|
||||
* to fill out the space.
|
||||
*/
|
||||
if (ret == -1)
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_puts(s, " ");
|
||||
|
||||
/* Catching here any failure happenned above */
|
||||
if (!ret)
|
||||
@@ -767,7 +767,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s,
|
||||
if (ret != TRACE_TYPE_HANDLED)
|
||||
return ret;
|
||||
|
||||
ret = trace_seq_printf(s, "| ");
|
||||
ret = trace_seq_puts(s, "| ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
@@ -817,7 +817,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
|
||||
|
||||
/* Function */
|
||||
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -858,7 +858,7 @@ print_graph_entry_nested(struct trace_iterator *iter,
|
||||
|
||||
/* Function */
|
||||
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -917,7 +917,7 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
|
||||
if (ret == TRACE_TYPE_PARTIAL_LINE)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
ret = trace_seq_printf(s, " | ");
|
||||
ret = trace_seq_puts(s, " | ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -1117,7 +1117,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
|
||||
|
||||
/* Closing brace */
|
||||
for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
@@ -1129,7 +1129,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
|
||||
* belongs to, write out the function name.
|
||||
*/
|
||||
if (func_match) {
|
||||
ret = trace_seq_printf(s, "}\n");
|
||||
ret = trace_seq_puts(s, "}\n");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
} else {
|
||||
@@ -1179,13 +1179,13 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
|
||||
/* Indentation */
|
||||
if (depth > 0)
|
||||
for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
|
||||
ret = trace_seq_printf(s, " ");
|
||||
ret = trace_seq_putc(s, ' ');
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
}
|
||||
|
||||
/* The comment */
|
||||
ret = trace_seq_printf(s, "/* ");
|
||||
ret = trace_seq_puts(s, "/* ");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
@@ -1216,7 +1216,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
|
||||
s->len--;
|
||||
}
|
||||
|
||||
ret = trace_seq_printf(s, " */\n");
|
||||
ret = trace_seq_puts(s, " */\n");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
@@ -1448,7 +1448,7 @@ static struct trace_event graph_trace_ret_event = {
|
||||
.funcs = &graph_functions
|
||||
};
|
||||
|
||||
static struct tracer graph_trace __read_mostly = {
|
||||
static struct tracer graph_trace __tracer_data = {
|
||||
.name = "function_graph",
|
||||
.open = graph_trace_open,
|
||||
.pipe_open = graph_trace_open,
|
||||
|
@@ -373,7 +373,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
|
||||
struct trace_array_cpu *data;
|
||||
unsigned long flags;
|
||||
|
||||
if (likely(!tracer_enabled))
|
||||
if (!tracer_enabled || !tracing_is_enabled())
|
||||
return;
|
||||
|
||||
cpu = raw_smp_processor_id();
|
||||
@@ -416,7 +416,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
|
||||
else
|
||||
return;
|
||||
|
||||
if (!tracer_enabled)
|
||||
if (!tracer_enabled || !tracing_is_enabled())
|
||||
return;
|
||||
|
||||
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
|
||||
|
@@ -35,12 +35,17 @@ struct trace_probe {
|
||||
const char *symbol; /* symbol name */
|
||||
struct ftrace_event_class class;
|
||||
struct ftrace_event_call call;
|
||||
struct ftrace_event_file * __rcu *files;
|
||||
struct list_head files;
|
||||
ssize_t size; /* trace entry size */
|
||||
unsigned int nr_args;
|
||||
struct probe_arg args[];
|
||||
};
|
||||
|
||||
struct event_file_link {
|
||||
struct ftrace_event_file *file;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
#define SIZEOF_TRACE_PROBE(n) \
|
||||
(offsetof(struct trace_probe, args) + \
|
||||
(sizeof(struct probe_arg) * (n)))
|
||||
@@ -150,6 +155,7 @@ static struct trace_probe *alloc_trace_probe(const char *group,
|
||||
goto error;
|
||||
|
||||
INIT_LIST_HEAD(&tp->list);
|
||||
INIT_LIST_HEAD(&tp->files);
|
||||
return tp;
|
||||
error:
|
||||
kfree(tp->call.name);
|
||||
@@ -183,25 +189,6 @@ static struct trace_probe *find_trace_probe(const char *event,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int trace_probe_nr_files(struct trace_probe *tp)
|
||||
{
|
||||
struct ftrace_event_file **file;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Since all tp->files updater is protected by probe_enable_lock,
|
||||
* we don't need to lock an rcu_read_lock.
|
||||
*/
|
||||
file = rcu_dereference_raw(tp->files);
|
||||
if (file)
|
||||
while (*(file++))
|
||||
ret++;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static DEFINE_MUTEX(probe_enable_lock);
|
||||
|
||||
/*
|
||||
* Enable trace_probe
|
||||
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
|
||||
@@ -211,67 +198,42 @@ enable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&probe_enable_lock);
|
||||
|
||||
if (file) {
|
||||
struct ftrace_event_file **new, **old;
|
||||
int n = trace_probe_nr_files(tp);
|
||||
struct event_file_link *link;
|
||||
|
||||
old = rcu_dereference_raw(tp->files);
|
||||
/* 1 is for new one and 1 is for stopper */
|
||||
new = kzalloc((n + 2) * sizeof(struct ftrace_event_file *),
|
||||
GFP_KERNEL);
|
||||
if (!new) {
|
||||
link = kmalloc(sizeof(*link), GFP_KERNEL);
|
||||
if (!link) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
memcpy(new, old, n * sizeof(struct ftrace_event_file *));
|
||||
new[n] = file;
|
||||
/* The last one keeps a NULL */
|
||||
|
||||
rcu_assign_pointer(tp->files, new);
|
||||
link->file = file;
|
||||
list_add_tail_rcu(&link->list, &tp->files);
|
||||
|
||||
tp->flags |= TP_FLAG_TRACE;
|
||||
|
||||
if (old) {
|
||||
/* Make sure the probe is done with old files */
|
||||
synchronize_sched();
|
||||
kfree(old);
|
||||
}
|
||||
} else
|
||||
tp->flags |= TP_FLAG_PROFILE;
|
||||
|
||||
if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
|
||||
!trace_probe_has_gone(tp)) {
|
||||
if (trace_probe_is_registered(tp) && !trace_probe_has_gone(tp)) {
|
||||
if (trace_probe_is_return(tp))
|
||||
ret = enable_kretprobe(&tp->rp);
|
||||
else
|
||||
ret = enable_kprobe(&tp->rp.kp);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&probe_enable_lock);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
static struct event_file_link *
|
||||
find_event_file_link(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
{
|
||||
struct ftrace_event_file **files;
|
||||
int i;
|
||||
struct event_file_link *link;
|
||||
|
||||
/*
|
||||
* Since all tp->files updater is protected by probe_enable_lock,
|
||||
* we don't need to lock an rcu_read_lock.
|
||||
*/
|
||||
files = rcu_dereference_raw(tp->files);
|
||||
if (files) {
|
||||
for (i = 0; files[i]; i++)
|
||||
if (files[i] == file)
|
||||
return i;
|
||||
}
|
||||
list_for_each_entry(link, &tp->files, list)
|
||||
if (link->file == file)
|
||||
return link;
|
||||
|
||||
return -1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -281,43 +243,23 @@ trace_probe_file_index(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
static int
|
||||
disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
{
|
||||
struct event_file_link *link = NULL;
|
||||
int wait = 0;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&probe_enable_lock);
|
||||
|
||||
if (file) {
|
||||
struct ftrace_event_file **new, **old;
|
||||
int n = trace_probe_nr_files(tp);
|
||||
int i, j;
|
||||
|
||||
old = rcu_dereference_raw(tp->files);
|
||||
if (n == 0 || trace_probe_file_index(tp, file) < 0) {
|
||||
link = find_event_file_link(tp, file);
|
||||
if (!link) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (n == 1) { /* Remove the last file */
|
||||
tp->flags &= ~TP_FLAG_TRACE;
|
||||
new = NULL;
|
||||
} else {
|
||||
new = kzalloc(n * sizeof(struct ftrace_event_file *),
|
||||
GFP_KERNEL);
|
||||
if (!new) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
list_del_rcu(&link->list);
|
||||
wait = 1;
|
||||
if (!list_empty(&tp->files))
|
||||
goto out;
|
||||
|
||||
/* This copy & check loop copies the NULL stopper too */
|
||||
for (i = 0, j = 0; j < n && i < n + 1; i++)
|
||||
if (old[i] != file)
|
||||
new[j++] = old[i];
|
||||
}
|
||||
|
||||
rcu_assign_pointer(tp->files, new);
|
||||
|
||||
/* Make sure the probe is done with old files */
|
||||
synchronize_sched();
|
||||
kfree(old);
|
||||
tp->flags &= ~TP_FLAG_TRACE;
|
||||
} else
|
||||
tp->flags &= ~TP_FLAG_PROFILE;
|
||||
|
||||
@@ -326,10 +268,21 @@ disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
|
||||
disable_kretprobe(&tp->rp);
|
||||
else
|
||||
disable_kprobe(&tp->rp.kp);
|
||||
wait = 1;
|
||||
}
|
||||
out:
|
||||
if (wait) {
|
||||
/*
|
||||
* Synchronize with kprobe_trace_func/kretprobe_trace_func
|
||||
* to ensure disabled (all running handlers are finished).
|
||||
* This is not only for kfree(), but also the caller,
|
||||
* trace_remove_event_call() supposes it for releasing
|
||||
* event_call related objects, which will be accessed in
|
||||
* the kprobe_trace_func/kretprobe_trace_func.
|
||||
*/
|
||||
synchronize_sched();
|
||||
kfree(link); /* Ignored if link == NULL */
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&probe_enable_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -885,20 +838,10 @@ __kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs,
|
||||
static __kprobes void
|
||||
kprobe_trace_func(struct trace_probe *tp, struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* Note: preempt is already disabled around the kprobe handler.
|
||||
* However, we still need an smp_read_barrier_depends() corresponding
|
||||
* to smp_wmb() in rcu_assign_pointer() to access the pointer.
|
||||
*/
|
||||
struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
|
||||
struct event_file_link *link;
|
||||
|
||||
if (unlikely(!file))
|
||||
return;
|
||||
|
||||
while (*file) {
|
||||
__kprobe_trace_func(tp, regs, *file);
|
||||
file++;
|
||||
}
|
||||
list_for_each_entry_rcu(link, &tp->files, list)
|
||||
__kprobe_trace_func(tp, regs, link->file);
|
||||
}
|
||||
|
||||
/* Kretprobe handler */
|
||||
@@ -945,20 +888,10 @@ static __kprobes void
|
||||
kretprobe_trace_func(struct trace_probe *tp, struct kretprobe_instance *ri,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* Note: preempt is already disabled around the kprobe handler.
|
||||
* However, we still need an smp_read_barrier_depends() corresponding
|
||||
* to smp_wmb() in rcu_assign_pointer() to access the pointer.
|
||||
*/
|
||||
struct ftrace_event_file **file = rcu_dereference_raw(tp->files);
|
||||
struct event_file_link *link;
|
||||
|
||||
if (unlikely(!file))
|
||||
return;
|
||||
|
||||
while (*file) {
|
||||
__kretprobe_trace_func(tp, ri, regs, *file);
|
||||
file++;
|
||||
}
|
||||
list_for_each_entry_rcu(link, &tp->files, list)
|
||||
__kretprobe_trace_func(tp, ri, regs, link->file);
|
||||
}
|
||||
|
||||
/* Event entry printers */
|
||||
@@ -1157,13 +1090,14 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
|
||||
int size, __size, dsize;
|
||||
int rctx;
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
if (hlist_empty(head))
|
||||
return;
|
||||
|
||||
dsize = __get_data_size(tp, regs);
|
||||
__size = sizeof(*entry) + tp->size + dsize;
|
||||
size = ALIGN(__size + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
|
||||
"profile buffer not large enough"))
|
||||
return;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
@@ -1172,10 +1106,7 @@ kprobe_perf_func(struct trace_probe *tp, struct pt_regs *regs)
|
||||
entry->ip = (unsigned long)tp->rp.kp.addr;
|
||||
memset(&entry[1], 0, dsize);
|
||||
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx,
|
||||
entry->ip, 1, regs, head, NULL);
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
/* Kretprobe profile handler */
|
||||
@@ -1189,13 +1120,14 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
|
||||
int size, __size, dsize;
|
||||
int rctx;
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
if (hlist_empty(head))
|
||||
return;
|
||||
|
||||
dsize = __get_data_size(tp, regs);
|
||||
__size = sizeof(*entry) + tp->size + dsize;
|
||||
size = ALIGN(__size + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
|
||||
"profile buffer not large enough"))
|
||||
return;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
@@ -1204,13 +1136,16 @@ kretprobe_perf_func(struct trace_probe *tp, struct kretprobe_instance *ri,
|
||||
entry->func = (unsigned long)tp->rp.kp.addr;
|
||||
entry->ret_ip = (unsigned long)ri->ret_addr;
|
||||
store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
|
||||
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
perf_trace_buf_submit(entry, size, rctx,
|
||||
entry->ret_ip, 1, regs, head, NULL);
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
/*
|
||||
* called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
|
||||
*
|
||||
* kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
|
||||
* lockless, but we can't race with this __init function.
|
||||
*/
|
||||
static __kprobes
|
||||
int kprobe_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
@@ -1376,6 +1311,10 @@ find_trace_probe_file(struct trace_probe *tp, struct trace_array *tr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Nobody but us can call enable_trace_probe/disable_trace_probe at this
|
||||
* stage, we can do this lockless.
|
||||
*/
|
||||
static __init int kprobe_trace_self_tests_init(void)
|
||||
{
|
||||
int ret, warn = 0;
|
||||
|
@@ -90,7 +90,7 @@ static int mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
|
||||
if (drv)
|
||||
ret += trace_seq_printf(s, " %s\n", drv->name);
|
||||
else
|
||||
ret += trace_seq_printf(s, " \n");
|
||||
ret += trace_seq_puts(s, " \n");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -107,7 +107,7 @@ static void mmio_pipe_open(struct trace_iterator *iter)
|
||||
struct header_iter *hiter;
|
||||
struct trace_seq *s = &iter->seq;
|
||||
|
||||
trace_seq_printf(s, "VERSION 20070824\n");
|
||||
trace_seq_puts(s, "VERSION 20070824\n");
|
||||
|
||||
hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
|
||||
if (!hiter)
|
||||
@@ -209,7 +209,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
|
||||
(rw->value >> 0) & 0xff, rw->pc, 0);
|
||||
break;
|
||||
default:
|
||||
ret = trace_seq_printf(s, "rw what?\n");
|
||||
ret = trace_seq_puts(s, "rw what?\n");
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
@@ -245,7 +245,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
|
||||
secs, usec_rem, m->map_id, 0UL, 0);
|
||||
break;
|
||||
default:
|
||||
ret = trace_seq_printf(s, "map what?\n");
|
||||
ret = trace_seq_puts(s, "map what?\n");
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
|
@@ -78,7 +78,7 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
|
||||
|
||||
trace_assign_type(field, entry);
|
||||
|
||||
ret = trace_seq_printf(s, "%s", field->buf);
|
||||
ret = trace_seq_puts(s, field->buf);
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
|
||||
@@ -558,14 +558,14 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
|
||||
if (ret)
|
||||
ret = trace_seq_puts(s, "??");
|
||||
if (ret)
|
||||
ret = trace_seq_puts(s, "\n");
|
||||
ret = trace_seq_putc(s, '\n');
|
||||
continue;
|
||||
}
|
||||
if (!ret)
|
||||
break;
|
||||
if (ret)
|
||||
ret = seq_print_user_ip(s, mm, ip, sym_flags);
|
||||
ret = trace_seq_puts(s, "\n");
|
||||
ret = trace_seq_putc(s, '\n');
|
||||
}
|
||||
|
||||
if (mm)
|
||||
@@ -579,7 +579,7 @@ seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
|
||||
int ret;
|
||||
|
||||
if (!ip)
|
||||
return trace_seq_printf(s, "0");
|
||||
return trace_seq_putc(s, '0');
|
||||
|
||||
if (sym_flags & TRACE_ITER_SYM_OFFSET)
|
||||
ret = seq_print_sym_offset(s, "%s", ip);
|
||||
@@ -964,14 +964,14 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
|
||||
goto partial;
|
||||
|
||||
if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
|
||||
if (!trace_seq_printf(s, " <-"))
|
||||
if (!trace_seq_puts(s, " <-"))
|
||||
goto partial;
|
||||
if (!seq_print_ip_sym(s,
|
||||
field->parent_ip,
|
||||
flags))
|
||||
goto partial;
|
||||
}
|
||||
if (!trace_seq_printf(s, "\n"))
|
||||
if (!trace_seq_putc(s, '\n'))
|
||||
goto partial;
|
||||
|
||||
return TRACE_TYPE_HANDLED;
|
||||
@@ -1210,7 +1210,7 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
|
||||
|
||||
if (!seq_print_ip_sym(s, *p, flags))
|
||||
goto partial;
|
||||
if (!trace_seq_puts(s, "\n"))
|
||||
if (!trace_seq_putc(s, '\n'))
|
||||
goto partial;
|
||||
}
|
||||
|
||||
|
@@ -640,13 +640,20 @@ out:
|
||||
* Enable ftrace, sleep 1/10 second, and then read the trace
|
||||
* buffer to see if all is in order.
|
||||
*/
|
||||
int
|
||||
__init int
|
||||
trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
int save_ftrace_enabled = ftrace_enabled;
|
||||
unsigned long count;
|
||||
int ret;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
if (ftrace_filter_param) {
|
||||
printk(KERN_CONT " ... kernel command line filter set: force PASS ... ");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* make sure msleep has been recorded */
|
||||
msleep(1);
|
||||
|
||||
@@ -727,13 +734,20 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
|
||||
* Pretty much the same than for the function tracer from which the selftest
|
||||
* has been borrowed.
|
||||
*/
|
||||
int
|
||||
__init int
|
||||
trace_selftest_startup_function_graph(struct tracer *trace,
|
||||
struct trace_array *tr)
|
||||
{
|
||||
int ret;
|
||||
unsigned long count;
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
if (ftrace_filter_param) {
|
||||
printk(KERN_CONT " ... kernel command line filter set: force PASS ... ");
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Simulate the init() callback but we attach a watchdog callback
|
||||
* to detect and recover from possible hangs
|
||||
|
@@ -175,7 +175,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags,
|
||||
entry = syscall_nr_to_meta(syscall);
|
||||
|
||||
if (!entry) {
|
||||
trace_seq_printf(s, "\n");
|
||||
trace_seq_putc(s, '\n');
|
||||
return TRACE_TYPE_HANDLED;
|
||||
}
|
||||
|
||||
@@ -306,6 +306,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
|
||||
struct syscall_metadata *sys_data;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer;
|
||||
unsigned long irq_flags;
|
||||
int pc;
|
||||
int syscall_nr;
|
||||
int size;
|
||||
|
||||
@@ -321,9 +323,12 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
|
||||
|
||||
size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
|
||||
|
||||
local_save_flags(irq_flags);
|
||||
pc = preempt_count();
|
||||
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer,
|
||||
sys_data->enter_event->event.type, size, 0, 0);
|
||||
sys_data->enter_event->event.type, size, irq_flags, pc);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
@@ -333,7 +338,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
|
||||
|
||||
if (!filter_current_check_discard(buffer, sys_data->enter_event,
|
||||
entry, event))
|
||||
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
trace_current_buffer_unlock_commit(buffer, event,
|
||||
irq_flags, pc);
|
||||
}
|
||||
|
||||
static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
||||
@@ -343,6 +349,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
||||
struct syscall_metadata *sys_data;
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer *buffer;
|
||||
unsigned long irq_flags;
|
||||
int pc;
|
||||
int syscall_nr;
|
||||
|
||||
syscall_nr = trace_get_syscall_nr(current, regs);
|
||||
@@ -355,9 +363,13 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
||||
if (!sys_data)
|
||||
return;
|
||||
|
||||
local_save_flags(irq_flags);
|
||||
pc = preempt_count();
|
||||
|
||||
buffer = tr->trace_buffer.buffer;
|
||||
event = trace_buffer_lock_reserve(buffer,
|
||||
sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
|
||||
sys_data->exit_event->event.type, sizeof(*entry),
|
||||
irq_flags, pc);
|
||||
if (!event)
|
||||
return;
|
||||
|
||||
@@ -367,7 +379,8 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret)
|
||||
|
||||
if (!filter_current_check_discard(buffer, sys_data->exit_event,
|
||||
entry, event))
|
||||
trace_current_buffer_unlock_commit(buffer, event, 0, 0);
|
||||
trace_current_buffer_unlock_commit(buffer, event,
|
||||
irq_flags, pc);
|
||||
}
|
||||
|
||||
static int reg_event_syscall_enter(struct ftrace_event_file *file,
|
||||
@@ -553,15 +566,15 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
if (!sys_data)
|
||||
return;
|
||||
|
||||
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
||||
if (hlist_empty(head))
|
||||
return;
|
||||
|
||||
/* get the size after alignment with the u32 buffer size field */
|
||||
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
|
||||
size = ALIGN(size + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
|
||||
"perf buffer not large enough"))
|
||||
return;
|
||||
|
||||
rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
|
||||
sys_data->enter_event->event.type, regs, &rctx);
|
||||
if (!rec)
|
||||
@@ -570,8 +583,6 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
rec->nr = syscall_nr;
|
||||
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
||||
(unsigned long *)&rec->args);
|
||||
|
||||
head = this_cpu_ptr(sys_data->enter_event->perf_events);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
@@ -629,18 +640,14 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
if (!sys_data)
|
||||
return;
|
||||
|
||||
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
||||
if (hlist_empty(head))
|
||||
return;
|
||||
|
||||
/* We can probably do that at build time */
|
||||
size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
|
||||
size -= sizeof(u32);
|
||||
|
||||
/*
|
||||
* Impossible, but be paranoid with the future
|
||||
* How to put this check outside runtime?
|
||||
*/
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
|
||||
"exit event has grown above perf buffer size"))
|
||||
return;
|
||||
|
||||
rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
|
||||
sys_data->exit_event->event.type, regs, &rctx);
|
||||
if (!rec)
|
||||
@@ -648,8 +655,6 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
|
||||
rec->nr = syscall_nr;
|
||||
rec->ret = syscall_get_return_value(current, regs);
|
||||
|
||||
head = this_cpu_ptr(sys_data->exit_event->perf_events);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
|
||||
|
@@ -283,8 +283,10 @@ static int create_trace_uprobe(int argc, char **argv)
|
||||
return -EINVAL;
|
||||
}
|
||||
arg = strchr(argv[1], ':');
|
||||
if (!arg)
|
||||
if (!arg) {
|
||||
ret = -EINVAL;
|
||||
goto fail_address_parse;
|
||||
}
|
||||
|
||||
*arg++ = '\0';
|
||||
filename = argv[1];
|
||||
@@ -816,8 +818,6 @@ static void uprobe_perf_print(struct trace_uprobe *tu,
|
||||
|
||||
size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
|
||||
size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
|
||||
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
|
||||
return;
|
||||
|
||||
preempt_disable();
|
||||
head = this_cpu_ptr(call->perf_events);
|
||||
|
@@ -333,7 +333,8 @@ int __wait_on_atomic_t(wait_queue_head_t *wq, struct wait_bit_queue *q,
|
||||
prepare_to_wait(wq, &q->wait, mode);
|
||||
val = q->key.flags;
|
||||
if (atomic_read(val) == 0)
|
||||
ret = (*action)(val);
|
||||
break;
|
||||
ret = (*action)(val);
|
||||
} while (!ret && atomic_read(val) != 0);
|
||||
finish_wait(wq, &q->wait);
|
||||
return ret;
|
||||
|
@@ -29,9 +29,9 @@
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
int watchdog_enabled = 1;
|
||||
int watchdog_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
static int __read_mostly watchdog_disabled;
|
||||
static int __read_mostly watchdog_running;
|
||||
static u64 __read_mostly sample_period;
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
|
||||
@@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str)
|
||||
else if (!strncmp(str, "nopanic", 7))
|
||||
hardlockup_panic = 0;
|
||||
else if (!strncmp(str, "0", 1))
|
||||
watchdog_enabled = 0;
|
||||
watchdog_user_enabled = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("nmi_watchdog=", hardlockup_panic_setup);
|
||||
@@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
|
||||
|
||||
static int __init nowatchdog_setup(char *str)
|
||||
{
|
||||
watchdog_enabled = 0;
|
||||
watchdog_user_enabled = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("nowatchdog", nowatchdog_setup);
|
||||
@@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup);
|
||||
/* deprecated */
|
||||
static int __init nosoftlockup_setup(char *str)
|
||||
{
|
||||
watchdog_enabled = 0;
|
||||
watchdog_user_enabled = 0;
|
||||
return 1;
|
||||
}
|
||||
__setup("nosoftlockup", nosoftlockup_setup);
|
||||
@@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void)
|
||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
||||
void touch_nmi_watchdog(void)
|
||||
{
|
||||
if (watchdog_enabled) {
|
||||
if (watchdog_user_enabled) {
|
||||
unsigned cpu;
|
||||
|
||||
for_each_present_cpu(cpu) {
|
||||
@@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu)
|
||||
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
|
||||
hrtimer->function = watchdog_timer_fn;
|
||||
|
||||
if (!watchdog_enabled) {
|
||||
kthread_park(current);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Enable the perf event */
|
||||
watchdog_nmi_enable(cpu);
|
||||
|
||||
@@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu)
|
||||
watchdog_nmi_disable(cpu);
|
||||
}
|
||||
|
||||
static void watchdog_cleanup(unsigned int cpu, bool online)
|
||||
{
|
||||
watchdog_disable(cpu);
|
||||
}
|
||||
|
||||
static int watchdog_should_run(unsigned int cpu)
|
||||
{
|
||||
return __this_cpu_read(hrtimer_interrupts) !=
|
||||
@@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; }
|
||||
static void watchdog_nmi_disable(unsigned int cpu) { return; }
|
||||
#endif /* CONFIG_HARDLOCKUP_DETECTOR */
|
||||
|
||||
static struct smp_hotplug_thread watchdog_threads = {
|
||||
.store = &softlockup_watchdog,
|
||||
.thread_should_run = watchdog_should_run,
|
||||
.thread_fn = watchdog,
|
||||
.thread_comm = "watchdog/%u",
|
||||
.setup = watchdog_enable,
|
||||
.cleanup = watchdog_cleanup,
|
||||
.park = watchdog_disable,
|
||||
.unpark = watchdog_enable,
|
||||
};
|
||||
|
||||
static int watchdog_enable_all_cpus(void)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!watchdog_running) {
|
||||
err = smpboot_register_percpu_thread(&watchdog_threads);
|
||||
if (err)
|
||||
pr_err("Failed to create watchdog threads, disabled\n");
|
||||
else
|
||||
watchdog_running = 1;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* prepare/enable/disable routines */
|
||||
/* sysctl functions */
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static void watchdog_enable_all_cpus(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
if (watchdog_disabled) {
|
||||
watchdog_disabled = 0;
|
||||
for_each_online_cpu(cpu)
|
||||
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
|
||||
}
|
||||
}
|
||||
|
||||
static void watchdog_disable_all_cpus(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
if (!watchdog_disabled) {
|
||||
watchdog_disabled = 1;
|
||||
for_each_online_cpu(cpu)
|
||||
kthread_park(per_cpu(softlockup_watchdog, cpu));
|
||||
if (watchdog_running) {
|
||||
watchdog_running = 0;
|
||||
smpboot_unregister_percpu_thread(&watchdog_threads);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -507,45 +519,48 @@ static void watchdog_disable_all_cpus(void)
|
||||
int proc_dowatchdog(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp, loff_t *ppos)
|
||||
{
|
||||
int ret;
|
||||
int err, old_thresh, old_enabled;
|
||||
|
||||
if (watchdog_disabled < 0)
|
||||
return -ENODEV;
|
||||
old_thresh = ACCESS_ONCE(watchdog_thresh);
|
||||
old_enabled = ACCESS_ONCE(watchdog_user_enabled);
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (err || !write)
|
||||
return err;
|
||||
|
||||
set_sample_period();
|
||||
/*
|
||||
* Watchdog threads shouldn't be enabled if they are
|
||||
* disabled. The 'watchdog_disabled' variable check in
|
||||
* disabled. The 'watchdog_running' variable check in
|
||||
* watchdog_*_all_cpus() function takes care of this.
|
||||
*/
|
||||
if (watchdog_enabled && watchdog_thresh)
|
||||
watchdog_enable_all_cpus();
|
||||
if (watchdog_user_enabled && watchdog_thresh)
|
||||
err = watchdog_enable_all_cpus();
|
||||
else
|
||||
watchdog_disable_all_cpus();
|
||||
|
||||
return ret;
|
||||
/* Restore old values on failure */
|
||||
if (err) {
|
||||
watchdog_thresh = old_thresh;
|
||||
watchdog_user_enabled = old_enabled;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
static struct smp_hotplug_thread watchdog_threads = {
|
||||
.store = &softlockup_watchdog,
|
||||
.thread_should_run = watchdog_should_run,
|
||||
.thread_fn = watchdog,
|
||||
.thread_comm = "watchdog/%u",
|
||||
.setup = watchdog_enable,
|
||||
.park = watchdog_disable,
|
||||
.unpark = watchdog_enable,
|
||||
};
|
||||
|
||||
void __init lockup_detector_init(void)
|
||||
{
|
||||
set_sample_period();
|
||||
if (smpboot_register_percpu_thread(&watchdog_threads)) {
|
||||
pr_err("Failed to create watchdog threads, disabled\n");
|
||||
watchdog_disabled = -ENODEV;
|
||||
|
||||
#ifdef CONFIG_NO_HZ_FULL
|
||||
if (watchdog_user_enabled) {
|
||||
watchdog_user_enabled = 0;
|
||||
pr_warning("Disabled lockup detectors by default for full dynticks\n");
|
||||
pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (watchdog_user_enabled)
|
||||
watchdog_enable_all_cpus();
|
||||
}
|
||||
|
@@ -4644,7 +4644,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
|
||||
* Workqueues should be brought up before normal priority CPU notifiers.
|
||||
* This will be registered high priority CPU notifier.
|
||||
*/
|
||||
static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
|
||||
static int workqueue_cpu_up_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
@@ -4697,7 +4697,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
|
||||
* Workqueues should be brought down after normal priority CPU notifiers.
|
||||
* This will be registered as low priority CPU notifier.
|
||||
*/
|
||||
static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
|
||||
static int workqueue_cpu_down_callback(struct notifier_block *nfb,
|
||||
unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
|
Reference in New Issue
Block a user