Merge tag 'v3.16-rc5' into timers/core
Reason: Bring in upstream modifications, so the pending changes which depend on them can be queued.
This commit is contained in:
@@ -223,3 +223,10 @@ endif
|
||||
config MUTEX_SPIN_ON_OWNER
|
||||
def_bool y
|
||||
depends on SMP && !DEBUG_MUTEXES
|
||||
|
||||
config ARCH_USE_QUEUE_RWLOCK
|
||||
bool
|
||||
|
||||
config QUEUE_RWLOCK
|
||||
def_bool y if ARCH_USE_QUEUE_RWLOCK
|
||||
depends on SMP
|
||||
|
@@ -423,6 +423,38 @@ static void kauditd_send_skb(struct sk_buff *skb)
|
||||
consume_skb(skb);
|
||||
}
|
||||
|
||||
/*
|
||||
* kauditd_send_multicast_skb - send the skb to multicast userspace listeners
|
||||
*
|
||||
* This function doesn't consume an skb as might be expected since it has to
|
||||
* copy it anyways.
|
||||
*/
|
||||
static void kauditd_send_multicast_skb(struct sk_buff *skb)
|
||||
{
|
||||
struct sk_buff *copy;
|
||||
struct audit_net *aunet = net_generic(&init_net, audit_net_id);
|
||||
struct sock *sock = aunet->nlsk;
|
||||
|
||||
if (!netlink_has_listeners(sock, AUDIT_NLGRP_READLOG))
|
||||
return;
|
||||
|
||||
/*
|
||||
* The seemingly wasteful skb_copy() rather than bumping the refcount
|
||||
* using skb_get() is necessary because non-standard mods are made to
|
||||
* the skb by the original kaudit unicast socket send routine. The
|
||||
* existing auditd daemon assumes this breakage. Fixing this would
|
||||
* require co-ordinating a change in the established protocol between
|
||||
* the kaudit kernel subsystem and the auditd userspace code. There is
|
||||
* no reason for new multicast clients to continue with this
|
||||
* non-compliance.
|
||||
*/
|
||||
copy = skb_copy(skb, GFP_KERNEL);
|
||||
if (!copy)
|
||||
return;
|
||||
|
||||
nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
|
||||
}
|
||||
|
||||
/*
|
||||
* flush_hold_queue - empty the hold queue if auditd appears
|
||||
*
|
||||
@@ -1076,10 +1108,22 @@ static void audit_receive(struct sk_buff *skb)
|
||||
mutex_unlock(&audit_cmd_mutex);
|
||||
}
|
||||
|
||||
/* Run custom bind function on netlink socket group connect or bind requests. */
|
||||
static int audit_bind(int group)
|
||||
{
|
||||
if (!capable(CAP_AUDIT_READ))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __net_init audit_net_init(struct net *net)
|
||||
{
|
||||
struct netlink_kernel_cfg cfg = {
|
||||
.input = audit_receive,
|
||||
.bind = audit_bind,
|
||||
.flags = NL_CFG_F_NONROOT_RECV,
|
||||
.groups = AUDIT_NLGRP_MAX,
|
||||
};
|
||||
|
||||
struct audit_net *aunet = net_generic(net, audit_net_id);
|
||||
@@ -1901,10 +1945,10 @@ out:
|
||||
* audit_log_end - end one audit record
|
||||
* @ab: the audit_buffer
|
||||
*
|
||||
* The netlink_* functions cannot be called inside an irq context, so
|
||||
* the audit buffer is placed on a queue and a tasklet is scheduled to
|
||||
* remove them from the queue outside the irq context. May be called in
|
||||
* any context.
|
||||
* netlink_unicast() cannot be called inside an irq context because it blocks
|
||||
* (last arg, flags, is not set to MSG_DONTWAIT), so the audit buffer is placed
|
||||
* on a queue and a tasklet is scheduled to remove them from the queue outside
|
||||
* the irq context. May be called in any context.
|
||||
*/
|
||||
void audit_log_end(struct audit_buffer *ab)
|
||||
{
|
||||
@@ -1914,6 +1958,18 @@ void audit_log_end(struct audit_buffer *ab)
|
||||
audit_log_lost("rate limit exceeded");
|
||||
} else {
|
||||
struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
|
||||
|
||||
kauditd_send_multicast_skb(ab->skb);
|
||||
|
||||
/*
|
||||
* The original kaudit unicast socket sends up messages with
|
||||
* nlmsg_len set to the payload length rather than the entire
|
||||
* message length. This breaks the standard set by netlink.
|
||||
* The existing auditd daemon assumes this breakage. Fixing
|
||||
* this would require co-ordinating a change in the established
|
||||
* protocol between the kaudit kernel subsystem and the auditd
|
||||
* userspace code.
|
||||
*/
|
||||
nlh->nlmsg_len = ab->skb->len - NLMSG_HDRLEN;
|
||||
|
||||
if (audit_pid) {
|
||||
|
@@ -728,6 +728,22 @@ static enum audit_state audit_filter_task(struct task_struct *tsk, char **key)
|
||||
return AUDIT_BUILD_CONTEXT;
|
||||
}
|
||||
|
||||
static int audit_in_mask(const struct audit_krule *rule, unsigned long val)
|
||||
{
|
||||
int word, bit;
|
||||
|
||||
if (val > 0xffffffff)
|
||||
return false;
|
||||
|
||||
word = AUDIT_WORD(val);
|
||||
if (word >= AUDIT_BITMASK_SIZE)
|
||||
return false;
|
||||
|
||||
bit = AUDIT_BIT(val);
|
||||
|
||||
return rule->mask[word] & bit;
|
||||
}
|
||||
|
||||
/* At syscall entry and exit time, this filter is called if the
|
||||
* audit_state is not low enough that auditing cannot take place, but is
|
||||
* also not high enough that we already know we have to write an audit
|
||||
@@ -745,11 +761,8 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
|
||||
|
||||
rcu_read_lock();
|
||||
if (!list_empty(list)) {
|
||||
int word = AUDIT_WORD(ctx->major);
|
||||
int bit = AUDIT_BIT(ctx->major);
|
||||
|
||||
list_for_each_entry_rcu(e, list, list) {
|
||||
if ((e->rule.mask[word] & bit) == bit &&
|
||||
if (audit_in_mask(&e->rule, ctx->major) &&
|
||||
audit_filter_rules(tsk, &e->rule, ctx, NULL,
|
||||
&state, false)) {
|
||||
rcu_read_unlock();
|
||||
@@ -769,20 +782,16 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
|
||||
static int audit_filter_inode_name(struct task_struct *tsk,
|
||||
struct audit_names *n,
|
||||
struct audit_context *ctx) {
|
||||
int word, bit;
|
||||
int h = audit_hash_ino((u32)n->ino);
|
||||
struct list_head *list = &audit_inode_hash[h];
|
||||
struct audit_entry *e;
|
||||
enum audit_state state;
|
||||
|
||||
word = AUDIT_WORD(ctx->major);
|
||||
bit = AUDIT_BIT(ctx->major);
|
||||
|
||||
if (list_empty(list))
|
||||
return 0;
|
||||
|
||||
list_for_each_entry_rcu(e, list, list) {
|
||||
if ((e->rule.mask[word] & bit) == bit &&
|
||||
if (audit_in_mask(&e->rule, ctx->major) &&
|
||||
audit_filter_rules(tsk, &e->rule, ctx, n, &state, false)) {
|
||||
ctx->current_state = state;
|
||||
return 1;
|
||||
|
@@ -424,23 +424,19 @@ bool capable(int cap)
|
||||
EXPORT_SYMBOL(capable);
|
||||
|
||||
/**
|
||||
* inode_capable - Check superior capability over inode
|
||||
* capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
|
||||
* @inode: The inode in question
|
||||
* @cap: The capability in question
|
||||
*
|
||||
* Return true if the current task has the given superior capability
|
||||
* targeted at it's own user namespace and that the given inode is owned
|
||||
* by the current user namespace or a child namespace.
|
||||
*
|
||||
* Currently we check to see if an inode is owned by the current
|
||||
* user namespace by seeing if the inode's owner maps into the
|
||||
* current user namespace.
|
||||
*
|
||||
* Return true if the current task has the given capability targeted at
|
||||
* its own user namespace and that the given inode's uid and gid are
|
||||
* mapped into the current user namespace.
|
||||
*/
|
||||
bool inode_capable(const struct inode *inode, int cap)
|
||||
bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
|
||||
{
|
||||
struct user_namespace *ns = current_user_ns();
|
||||
|
||||
return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
|
||||
return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
|
||||
kgid_has_mapping(ns, inode->i_gid);
|
||||
}
|
||||
EXPORT_SYMBOL(inode_capable);
|
||||
EXPORT_SYMBOL(capable_wrt_inode_uidgid);
|
||||
|
1877
kernel/cgroup.c
1877
kernel/cgroup.c
File diff suppressed because it is too large
Load Diff
@@ -59,7 +59,7 @@ static inline struct freezer *task_freezer(struct task_struct *task)
|
||||
|
||||
static struct freezer *parent_freezer(struct freezer *freezer)
|
||||
{
|
||||
return css_freezer(css_parent(&freezer->css));
|
||||
return css_freezer(freezer->css.parent);
|
||||
}
|
||||
|
||||
bool cgroup_freezing(struct task_struct *task)
|
||||
@@ -73,10 +73,6 @@ bool cgroup_freezing(struct task_struct *task)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* cgroups_write_string() limits the size of freezer state strings to
|
||||
* CGROUP_LOCAL_BUFFER_SIZE
|
||||
*/
|
||||
static const char *freezer_state_strs(unsigned int state)
|
||||
{
|
||||
if (state & CGROUP_FROZEN)
|
||||
@@ -304,7 +300,7 @@ static int freezer_read(struct seq_file *m, void *v)
|
||||
|
||||
/* update states bottom-up */
|
||||
css_for_each_descendant_post(pos, css) {
|
||||
if (!css_tryget(pos))
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -404,7 +400,7 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
struct freezer *pos_f = css_freezer(pos);
|
||||
struct freezer *parent = parent_freezer(pos_f);
|
||||
|
||||
if (!css_tryget(pos))
|
||||
if (!css_tryget_online(pos))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -423,20 +419,22 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
|
||||
mutex_unlock(&freezer_mutex);
|
||||
}
|
||||
|
||||
static int freezer_write(struct cgroup_subsys_state *css, struct cftype *cft,
|
||||
char *buffer)
|
||||
static ssize_t freezer_write(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
bool freeze;
|
||||
|
||||
if (strcmp(buffer, freezer_state_strs(0)) == 0)
|
||||
buf = strstrip(buf);
|
||||
|
||||
if (strcmp(buf, freezer_state_strs(0)) == 0)
|
||||
freeze = false;
|
||||
else if (strcmp(buffer, freezer_state_strs(CGROUP_FROZEN)) == 0)
|
||||
else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
|
||||
freeze = true;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
freezer_change_state(css_freezer(css), freeze);
|
||||
return 0;
|
||||
freezer_change_state(css_freezer(of_css(of)), freeze);
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
|
||||
@@ -460,7 +458,7 @@ static struct cftype files[] = {
|
||||
.name = "state",
|
||||
.flags = CFTYPE_NOT_ON_ROOT,
|
||||
.seq_show = freezer_read,
|
||||
.write_string = freezer_write,
|
||||
.write = freezer_write,
|
||||
},
|
||||
{
|
||||
.name = "self_freezing",
|
||||
|
@@ -19,6 +19,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kprobes.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/context_tracking.h>
|
||||
@@ -104,6 +105,7 @@ void context_tracking_user_enter(void)
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_enter);
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
/**
|
||||
@@ -181,6 +183,7 @@ void context_tracking_user_exit(void)
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(context_tracking_user_exit);
|
||||
|
||||
/**
|
||||
* __context_tracking_task_switch - context switch the syscall callbacks
|
||||
|
@@ -20,6 +20,7 @@
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/lockdep.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "smpboot.h"
|
||||
|
||||
@@ -520,7 +521,9 @@ int disable_nonboot_cpus(void)
|
||||
for_each_online_cpu(cpu) {
|
||||
if (cpu == first_cpu)
|
||||
continue;
|
||||
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
|
||||
error = _cpu_down(cpu, 1);
|
||||
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
|
||||
if (!error)
|
||||
cpumask_set_cpu(cpu, frozen_cpus);
|
||||
else {
|
||||
@@ -563,7 +566,9 @@ void __ref enable_nonboot_cpus(void)
|
||||
arch_enable_nonboot_cpus_begin();
|
||||
|
||||
for_each_cpu(cpu, frozen_cpus) {
|
||||
trace_suspend_resume(TPS("CPU_ON"), cpu, true);
|
||||
error = _cpu_up(cpu, 1);
|
||||
trace_suspend_resume(TPS("CPU_ON"), cpu, false);
|
||||
if (!error) {
|
||||
pr_info("CPU%d is up\n", cpu);
|
||||
continue;
|
||||
|
@@ -119,7 +119,7 @@ static inline struct cpuset *task_cs(struct task_struct *task)
|
||||
|
||||
static inline struct cpuset *parent_cs(struct cpuset *cs)
|
||||
{
|
||||
return css_cs(css_parent(&cs->css));
|
||||
return css_cs(cs->css.parent);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@@ -691,11 +691,8 @@ restart:
|
||||
if (nslot == ndoms) {
|
||||
static int warnings = 10;
|
||||
if (warnings) {
|
||||
printk(KERN_WARNING
|
||||
"rebuild_sched_domains confused:"
|
||||
" nslot %d, ndoms %d, csn %d, i %d,"
|
||||
" apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
|
||||
nslot, ndoms, csn, i, apn);
|
||||
warnings--;
|
||||
}
|
||||
continue;
|
||||
@@ -870,7 +867,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!css_tryget(&cp->css))
|
||||
if (!css_tryget_online(&cp->css))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -885,6 +882,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root)
|
||||
/**
|
||||
* update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
|
||||
* @cs: the cpuset to consider
|
||||
* @trialcs: trial cpuset
|
||||
* @buf: buffer of cpu numbers written to this cpuset
|
||||
*/
|
||||
static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
@@ -1105,7 +1103,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root)
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (!css_tryget(&cp->css))
|
||||
if (!css_tryget_online(&cp->css))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -1183,7 +1181,13 @@ done:
|
||||
|
||||
int current_cpuset_is_being_rebound(void)
|
||||
{
|
||||
return task_cs(current) == cpuset_being_rebound;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = task_cs(current) == cpuset_being_rebound;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int update_relax_domain_level(struct cpuset *cs, s64 val)
|
||||
@@ -1600,13 +1604,15 @@ out_unlock:
|
||||
/*
|
||||
* Common handling for a write to a "cpus" or "mems" file.
|
||||
*/
|
||||
static int cpuset_write_resmask(struct cgroup_subsys_state *css,
|
||||
struct cftype *cft, char *buf)
|
||||
static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
|
||||
char *buf, size_t nbytes, loff_t off)
|
||||
{
|
||||
struct cpuset *cs = css_cs(css);
|
||||
struct cpuset *cs = css_cs(of_css(of));
|
||||
struct cpuset *trialcs;
|
||||
int retval = -ENODEV;
|
||||
|
||||
buf = strstrip(buf);
|
||||
|
||||
/*
|
||||
* CPU or memory hotunplug may leave @cs w/o any execution
|
||||
* resources, in which case the hotplug code asynchronously updates
|
||||
@@ -1617,7 +1623,17 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
|
||||
* resources, wait for the previously scheduled operations before
|
||||
* proceeding, so that we don't end up keep removing tasks added
|
||||
* after execution capability is restored.
|
||||
*
|
||||
* cpuset_hotplug_work calls back into cgroup core via
|
||||
* cgroup_transfer_tasks() and waiting for it from a cgroupfs
|
||||
* operation like this one can lead to a deadlock through kernfs
|
||||
* active_ref protection. Let's break the protection. Losing the
|
||||
* protection is okay as we check whether @cs is online after
|
||||
* grabbing cpuset_mutex anyway. This only happens on the legacy
|
||||
* hierarchies.
|
||||
*/
|
||||
css_get(&cs->css);
|
||||
kernfs_break_active_protection(of->kn);
|
||||
flush_work(&cpuset_hotplug_work);
|
||||
|
||||
mutex_lock(&cpuset_mutex);
|
||||
@@ -1630,7 +1646,7 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
switch (cft->private) {
|
||||
switch (of_cft(of)->private) {
|
||||
case FILE_CPULIST:
|
||||
retval = update_cpumask(cs, trialcs, buf);
|
||||
break;
|
||||
@@ -1645,7 +1661,9 @@ static int cpuset_write_resmask(struct cgroup_subsys_state *css,
|
||||
free_trial_cpuset(trialcs);
|
||||
out_unlock:
|
||||
mutex_unlock(&cpuset_mutex);
|
||||
return retval;
|
||||
kernfs_unbreak_active_protection(of->kn);
|
||||
css_put(&cs->css);
|
||||
return retval ?: nbytes;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1747,7 +1765,7 @@ static struct cftype files[] = {
|
||||
{
|
||||
.name = "cpus",
|
||||
.seq_show = cpuset_common_seq_show,
|
||||
.write_string = cpuset_write_resmask,
|
||||
.write = cpuset_write_resmask,
|
||||
.max_write_len = (100U + 6 * NR_CPUS),
|
||||
.private = FILE_CPULIST,
|
||||
},
|
||||
@@ -1755,7 +1773,7 @@ static struct cftype files[] = {
|
||||
{
|
||||
.name = "mems",
|
||||
.seq_show = cpuset_common_seq_show,
|
||||
.write_string = cpuset_write_resmask,
|
||||
.write = cpuset_write_resmask,
|
||||
.max_write_len = (100U + 6 * MAX_NUMNODES),
|
||||
.private = FILE_MEMLIST,
|
||||
},
|
||||
@@ -2011,7 +2029,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
||||
parent = parent_cs(parent);
|
||||
|
||||
if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
|
||||
printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset ");
|
||||
pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
|
||||
pr_cont_cgroup_name(cs->css.cgroup);
|
||||
pr_cont("\n");
|
||||
}
|
||||
@@ -2149,7 +2167,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
|
||||
|
||||
rcu_read_lock();
|
||||
cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
|
||||
if (cs == &top_cpuset || !css_tryget(&cs->css))
|
||||
if (cs == &top_cpuset || !css_tryget_online(&cs->css))
|
||||
continue;
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -2530,7 +2548,7 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
|
||||
|
||||
/**
|
||||
* cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed
|
||||
* @task: pointer to task_struct of some task.
|
||||
* @tsk: pointer to task_struct of some task.
|
||||
*
|
||||
* Description: Prints @task's name, cpuset name, and cached copy of its
|
||||
* mems_allowed to the kernel log.
|
||||
@@ -2548,7 +2566,7 @@ void cpuset_print_task_mems_allowed(struct task_struct *tsk)
|
||||
cgrp = task_cs(tsk)->css.cgroup;
|
||||
nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
|
||||
tsk->mems_allowed);
|
||||
printk(KERN_INFO "%s cpuset=", tsk->comm);
|
||||
pr_info("%s cpuset=", tsk->comm);
|
||||
pr_cont_cgroup_name(cgrp);
|
||||
pr_cont(" mems_allowed=%s\n", cpuset_nodelist);
|
||||
|
||||
@@ -2640,10 +2658,10 @@ out:
|
||||
/* Display task mems_allowed in /proc/<pid>/status file. */
|
||||
void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
|
||||
{
|
||||
seq_printf(m, "Mems_allowed:\t");
|
||||
seq_puts(m, "Mems_allowed:\t");
|
||||
seq_nodemask(m, &task->mems_allowed);
|
||||
seq_printf(m, "\n");
|
||||
seq_printf(m, "Mems_allowed_list:\t");
|
||||
seq_puts(m, "\n");
|
||||
seq_puts(m, "Mems_allowed_list:\t");
|
||||
seq_nodemask_list(m, &task->mems_allowed);
|
||||
seq_printf(m, "\n");
|
||||
seq_puts(m, "\n");
|
||||
}
|
||||
|
@@ -40,6 +40,7 @@
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@@ -608,7 +609,8 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
||||
css = css_tryget_from_dir(f.file->f_dentry, &perf_event_cgrp_subsys);
|
||||
css = css_tryget_online_from_dir(f.file->f_dentry,
|
||||
&perf_event_cgrp_subsys);
|
||||
if (IS_ERR(css)) {
|
||||
ret = PTR_ERR(css);
|
||||
goto out;
|
||||
@@ -2973,6 +2975,22 @@ out:
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void perf_event_exec(void)
|
||||
{
|
||||
struct perf_event_context *ctx;
|
||||
int ctxn;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_task_context_nr(ctxn) {
|
||||
ctx = current->perf_event_ctxp[ctxn];
|
||||
if (!ctx)
|
||||
continue;
|
||||
|
||||
perf_event_enable_on_exec(ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross CPU call to read the hardware event
|
||||
*/
|
||||
@@ -5074,21 +5092,9 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
|
||||
NULL);
|
||||
}
|
||||
|
||||
void perf_event_comm(struct task_struct *task)
|
||||
void perf_event_comm(struct task_struct *task, bool exec)
|
||||
{
|
||||
struct perf_comm_event comm_event;
|
||||
struct perf_event_context *ctx;
|
||||
int ctxn;
|
||||
|
||||
rcu_read_lock();
|
||||
for_each_task_context_nr(ctxn) {
|
||||
ctx = task->perf_event_ctxp[ctxn];
|
||||
if (!ctx)
|
||||
continue;
|
||||
|
||||
perf_event_enable_on_exec(ctx);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
if (!atomic_read(&nr_comm_events))
|
||||
return;
|
||||
@@ -5100,7 +5106,7 @@ void perf_event_comm(struct task_struct *task)
|
||||
.event_id = {
|
||||
.header = {
|
||||
.type = PERF_RECORD_COMM,
|
||||
.misc = 0,
|
||||
.misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0,
|
||||
/* .size */
|
||||
},
|
||||
/* .pid */
|
||||
@@ -5123,6 +5129,7 @@ struct perf_mmap_event {
|
||||
int maj, min;
|
||||
u64 ino;
|
||||
u64 ino_generation;
|
||||
u32 prot, flags;
|
||||
|
||||
struct {
|
||||
struct perf_event_header header;
|
||||
@@ -5164,6 +5171,8 @@ static void perf_event_mmap_output(struct perf_event *event,
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->min);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->ino);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->prot);
|
||||
mmap_event->event_id.header.size += sizeof(mmap_event->flags);
|
||||
}
|
||||
|
||||
perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
|
||||
@@ -5182,6 +5191,8 @@ static void perf_event_mmap_output(struct perf_event *event,
|
||||
perf_output_put(&handle, mmap_event->min);
|
||||
perf_output_put(&handle, mmap_event->ino);
|
||||
perf_output_put(&handle, mmap_event->ino_generation);
|
||||
perf_output_put(&handle, mmap_event->prot);
|
||||
perf_output_put(&handle, mmap_event->flags);
|
||||
}
|
||||
|
||||
__output_copy(&handle, mmap_event->file_name,
|
||||
@@ -5200,6 +5211,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
struct file *file = vma->vm_file;
|
||||
int maj = 0, min = 0;
|
||||
u64 ino = 0, gen = 0;
|
||||
u32 prot = 0, flags = 0;
|
||||
unsigned int size;
|
||||
char tmp[16];
|
||||
char *buf = NULL;
|
||||
@@ -5230,6 +5242,28 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
|
||||
gen = inode->i_generation;
|
||||
maj = MAJOR(dev);
|
||||
min = MINOR(dev);
|
||||
|
||||
if (vma->vm_flags & VM_READ)
|
||||
prot |= PROT_READ;
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
prot |= PROT_WRITE;
|
||||
if (vma->vm_flags & VM_EXEC)
|
||||
prot |= PROT_EXEC;
|
||||
|
||||
if (vma->vm_flags & VM_MAYSHARE)
|
||||
flags = MAP_SHARED;
|
||||
else
|
||||
flags = MAP_PRIVATE;
|
||||
|
||||
if (vma->vm_flags & VM_DENYWRITE)
|
||||
flags |= MAP_DENYWRITE;
|
||||
if (vma->vm_flags & VM_MAYEXEC)
|
||||
flags |= MAP_EXECUTABLE;
|
||||
if (vma->vm_flags & VM_LOCKED)
|
||||
flags |= MAP_LOCKED;
|
||||
if (vma->vm_flags & VM_HUGETLB)
|
||||
flags |= MAP_HUGETLB;
|
||||
|
||||
goto got_name;
|
||||
} else {
|
||||
name = (char *)arch_vma_name(vma);
|
||||
@@ -5270,6 +5304,8 @@ got_name:
|
||||
mmap_event->min = min;
|
||||
mmap_event->ino = ino;
|
||||
mmap_event->ino_generation = gen;
|
||||
mmap_event->prot = prot;
|
||||
mmap_event->flags = flags;
|
||||
|
||||
if (!(vma->vm_flags & VM_EXEC))
|
||||
mmap_event->event_id.header.misc |= PERF_RECORD_MISC_MMAP_DATA;
|
||||
@@ -5310,6 +5346,8 @@ void perf_event_mmap(struct vm_area_struct *vma)
|
||||
/* .min (attr_mmap2 only) */
|
||||
/* .ino (attr_mmap2 only) */
|
||||
/* .ino_generation (attr_mmap2 only) */
|
||||
/* .prot (attr_mmap2 only) */
|
||||
/* .flags (attr_mmap2 only) */
|
||||
};
|
||||
|
||||
perf_event_mmap_event(&mmap_event);
|
||||
@@ -6892,10 +6930,6 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
|
||||
if (ret)
|
||||
return -EFAULT;
|
||||
|
||||
/* disabled for now */
|
||||
if (attr->mmap2)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->__reserved_1)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -7121,6 +7155,13 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
}
|
||||
}
|
||||
|
||||
if (is_sampling_event(event)) {
|
||||
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
|
||||
err = -ENOTSUPP;
|
||||
goto err_alloc;
|
||||
}
|
||||
}
|
||||
|
||||
account_event(event);
|
||||
|
||||
/*
|
||||
@@ -7432,7 +7473,7 @@ __perf_event_exit_task(struct perf_event *child_event,
|
||||
|
||||
static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
{
|
||||
struct perf_event *child_event;
|
||||
struct perf_event *child_event, *next;
|
||||
struct perf_event_context *child_ctx;
|
||||
unsigned long flags;
|
||||
|
||||
@@ -7486,7 +7527,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
|
||||
*/
|
||||
mutex_lock(&child_ctx->mutex);
|
||||
|
||||
list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry)
|
||||
list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
|
||||
__perf_event_exit_task(child_event, child_ctx, child);
|
||||
|
||||
mutex_unlock(&child_ctx->mutex);
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include "../../mm/internal.h" /* munlock_vma_page */
|
||||
#include <linux/percpu-rwsem.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/shmem_fs.h>
|
||||
|
||||
#include <linux/uprobes.h>
|
||||
|
||||
@@ -127,7 +128,7 @@ struct xol_area {
|
||||
*/
|
||||
static bool valid_vma(struct vm_area_struct *vma, bool is_register)
|
||||
{
|
||||
vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED;
|
||||
vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE;
|
||||
|
||||
if (is_register)
|
||||
flags |= VM_WRITE;
|
||||
@@ -279,18 +280,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
|
||||
* supported by that architecture then we need to modify is_trap_at_addr and
|
||||
* uprobe_write_opcode accordingly. This would never be a problem for archs
|
||||
* that have fixed length instructions.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* uprobe_write_opcode - write the opcode at a given virtual address.
|
||||
* @mm: the probed process address space.
|
||||
* @vaddr: the virtual address to store the opcode.
|
||||
* @opcode: opcode to be written at @vaddr.
|
||||
*
|
||||
* Called with mm->mmap_sem held (for read and with a reference to
|
||||
* mm).
|
||||
*
|
||||
* For mm @mm, write the opcode at @vaddr.
|
||||
* Called with mm->mmap_sem held for write.
|
||||
* Return 0 (success) or a negative errno.
|
||||
*/
|
||||
int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
|
||||
@@ -310,21 +306,25 @@ retry:
|
||||
if (ret <= 0)
|
||||
goto put_old;
|
||||
|
||||
ret = anon_vma_prepare(vma);
|
||||
if (ret)
|
||||
goto put_old;
|
||||
|
||||
ret = -ENOMEM;
|
||||
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
|
||||
if (!new_page)
|
||||
goto put_old;
|
||||
|
||||
__SetPageUptodate(new_page);
|
||||
if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL))
|
||||
goto put_new;
|
||||
|
||||
__SetPageUptodate(new_page);
|
||||
copy_highpage(new_page, old_page);
|
||||
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
|
||||
|
||||
ret = anon_vma_prepare(vma);
|
||||
if (ret)
|
||||
goto put_new;
|
||||
|
||||
ret = __replace_page(vma, vaddr, old_page, new_page);
|
||||
if (ret)
|
||||
mem_cgroup_uncharge_page(new_page);
|
||||
|
||||
put_new:
|
||||
page_cache_release(new_page);
|
||||
@@ -537,14 +537,15 @@ static int __copy_insn(struct address_space *mapping, struct file *filp,
|
||||
void *insn, int nbytes, loff_t offset)
|
||||
{
|
||||
struct page *page;
|
||||
|
||||
if (!mapping->a_ops->readpage)
|
||||
return -EIO;
|
||||
/*
|
||||
* Ensure that the page that has the original instruction is
|
||||
* populated and in page-cache.
|
||||
* Ensure that the page that has the original instruction is populated
|
||||
* and in page-cache. If ->readpage == NULL it must be shmem_mapping(),
|
||||
* see uprobe_register().
|
||||
*/
|
||||
page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
|
||||
if (mapping->a_ops->readpage)
|
||||
page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp);
|
||||
else
|
||||
page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT);
|
||||
if (IS_ERR(page))
|
||||
return PTR_ERR(page);
|
||||
|
||||
@@ -845,7 +846,7 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!consumer_del(uprobe, uc)) /* WARN? */
|
||||
if (WARN_ON(!consumer_del(uprobe, uc)))
|
||||
return;
|
||||
|
||||
err = register_for_each_vma(uprobe, NULL);
|
||||
@@ -880,6 +881,9 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
|
||||
if (!uc->handler && !uc->ret_handler)
|
||||
return -EINVAL;
|
||||
|
||||
/* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */
|
||||
if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping))
|
||||
return -EIO;
|
||||
/* Racy, just to catch the obvious mistakes */
|
||||
if (offset > i_size_read(inode))
|
||||
return -EINVAL;
|
||||
@@ -923,7 +927,7 @@ int uprobe_apply(struct inode *inode, loff_t offset,
|
||||
int ret = -ENOENT;
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
if (!uprobe)
|
||||
if (WARN_ON(!uprobe))
|
||||
return ret;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
@@ -948,7 +952,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
|
||||
struct uprobe *uprobe;
|
||||
|
||||
uprobe = find_uprobe(inode, offset);
|
||||
if (!uprobe)
|
||||
if (WARN_ON(!uprobe))
|
||||
return;
|
||||
|
||||
down_write(&uprobe->register_rwsem);
|
||||
@@ -1361,6 +1365,16 @@ unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs)
|
||||
return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE;
|
||||
}
|
||||
|
||||
unsigned long uprobe_get_trap_addr(struct pt_regs *regs)
|
||||
{
|
||||
struct uprobe_task *utask = current->utask;
|
||||
|
||||
if (unlikely(utask && utask->active_uprobe))
|
||||
return utask->vaddr;
|
||||
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with no locks held.
|
||||
* Called in context of a exiting or a exec-ing thread.
|
||||
|
@@ -1487,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
|
||||
total_forks++;
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
syscall_tracepoint_update(p);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
|
||||
proc_fork_connector(p);
|
||||
cgroup_post_fork(p);
|
||||
if (clone_flags & CLONE_THREAD)
|
||||
|
@@ -85,6 +85,12 @@ void __gcov_merge_ior(gcov_type *counters, unsigned int n_counters)
|
||||
}
|
||||
EXPORT_SYMBOL(__gcov_merge_ior);
|
||||
|
||||
void __gcov_merge_time_profile(gcov_type *counters, unsigned int n_counters)
|
||||
{
|
||||
/* Unused. */
|
||||
}
|
||||
EXPORT_SYMBOL(__gcov_merge_time_profile);
|
||||
|
||||
/**
|
||||
* gcov_enable_events - enable event reporting through gcov_event()
|
||||
*
|
||||
|
@@ -18,7 +18,12 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include "gcov.h"
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ >= 9
|
||||
#define GCOV_COUNTERS 9
|
||||
#else
|
||||
#define GCOV_COUNTERS 8
|
||||
#endif
|
||||
|
||||
#define GCOV_TAG_FUNCTION_LENGTH 3
|
||||
|
||||
static struct gcov_info *gcov_info_head;
|
||||
|
@@ -455,9 +455,9 @@ EXPORT_SYMBOL_GPL(irq_alloc_hwirqs);
|
||||
*/
|
||||
void irq_free_hwirqs(unsigned int from, int cnt)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
for (i = from; cnt > 0; i++, cnt--) {
|
||||
for (i = from, j = cnt; j > 0; i++, j--) {
|
||||
irq_set_status_flags(i, _IRQ_NOREQUEST | _IRQ_NOPROBE);
|
||||
arch_teardown_hwirq(i);
|
||||
}
|
||||
|
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void)
|
||||
#ifdef CONFIG_MEMORY_FAILURE
|
||||
VMCOREINFO_NUMBER(PG_hwpoison);
|
||||
#endif
|
||||
VMCOREINFO_NUMBER(PG_head_mask);
|
||||
VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
|
||||
|
||||
arch_crash_save_vmcoreinfo();
|
||||
|
392
kernel/kprobes.c
392
kernel/kprobes.c
@@ -86,21 +86,8 @@ static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
|
||||
return &(kretprobe_table_locks[hash].lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Normally, functions that we'd want to prohibit kprobes in, are marked
|
||||
* __kprobes. But, there are cases where such functions already belong to
|
||||
* a different section (__sched for preempt_schedule)
|
||||
*
|
||||
* For such cases, we now have a blacklist
|
||||
*/
|
||||
static struct kprobe_blackpoint kprobe_blacklist[] = {
|
||||
{"preempt_schedule",},
|
||||
{"native_get_debugreg",},
|
||||
{"irq_entries_start",},
|
||||
{"common_interrupt",},
|
||||
{"mcount",}, /* mcount can be called from everywhere */
|
||||
{NULL} /* Terminator */
|
||||
};
|
||||
/* Blacklist -- list of struct kprobe_blacklist_entry */
|
||||
static LIST_HEAD(kprobe_blacklist);
|
||||
|
||||
#ifdef __ARCH_WANT_KPROBES_INSN_SLOT
|
||||
/*
|
||||
@@ -151,13 +138,13 @@ struct kprobe_insn_cache kprobe_insn_slots = {
|
||||
.insn_size = MAX_INSN_SIZE,
|
||||
.nr_garbage = 0,
|
||||
};
|
||||
static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
|
||||
static int collect_garbage_slots(struct kprobe_insn_cache *c);
|
||||
|
||||
/**
|
||||
* __get_insn_slot() - Find a slot on an executable page for an instruction.
|
||||
* We allocate an executable page if there's no room on existing ones.
|
||||
*/
|
||||
kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
|
||||
kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
kprobe_opcode_t *slot = NULL;
|
||||
@@ -214,7 +201,7 @@ out:
|
||||
}
|
||||
|
||||
/* Return 1 if all garbages are collected, otherwise 0. */
|
||||
static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
|
||||
static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
|
||||
{
|
||||
kip->slot_used[idx] = SLOT_CLEAN;
|
||||
kip->nused--;
|
||||
@@ -235,7 +222,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
|
||||
static int collect_garbage_slots(struct kprobe_insn_cache *c)
|
||||
{
|
||||
struct kprobe_insn_page *kip, *next;
|
||||
|
||||
@@ -257,8 +244,8 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
|
||||
kprobe_opcode_t *slot, int dirty)
|
||||
void __free_insn_slot(struct kprobe_insn_cache *c,
|
||||
kprobe_opcode_t *slot, int dirty)
|
||||
{
|
||||
struct kprobe_insn_page *kip;
|
||||
|
||||
@@ -314,7 +301,7 @@ static inline void reset_kprobe_instance(void)
|
||||
* OR
|
||||
* - with preemption disabled - from arch/xxx/kernel/kprobes.c
|
||||
*/
|
||||
struct kprobe __kprobes *get_kprobe(void *addr)
|
||||
struct kprobe *get_kprobe(void *addr)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
@@ -327,8 +314,9 @@ struct kprobe __kprobes *get_kprobe(void *addr)
|
||||
|
||||
return NULL;
|
||||
}
|
||||
NOKPROBE_SYMBOL(get_kprobe);
|
||||
|
||||
static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
|
||||
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs);
|
||||
|
||||
/* Return true if the kprobe is an aggregator */
|
||||
static inline int kprobe_aggrprobe(struct kprobe *p)
|
||||
@@ -360,7 +348,7 @@ static bool kprobes_allow_optimization;
|
||||
* Call all pre_handler on the list, but ignores its return value.
|
||||
* This must be called from arch-dep optimized caller.
|
||||
*/
|
||||
void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
void opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -372,9 +360,10 @@ void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
reset_kprobe_instance();
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(opt_pre_handler);
|
||||
|
||||
/* Free optimized instructions and optimized_kprobe */
|
||||
static __kprobes void free_aggr_kprobe(struct kprobe *p)
|
||||
static void free_aggr_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -412,7 +401,7 @@ static inline int kprobe_disarmed(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Return true(!0) if the probe is queued on (un)optimizing lists */
|
||||
static int __kprobes kprobe_queued(struct kprobe *p)
|
||||
static int kprobe_queued(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -428,7 +417,7 @@ static int __kprobes kprobe_queued(struct kprobe *p)
|
||||
* Return an optimized kprobe whose optimizing code replaces
|
||||
* instructions including addr (exclude breakpoint).
|
||||
*/
|
||||
static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr)
|
||||
static struct kprobe *get_optimized_kprobe(unsigned long addr)
|
||||
{
|
||||
int i;
|
||||
struct kprobe *p = NULL;
|
||||
@@ -460,7 +449,7 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
|
||||
* Optimize (replace a breakpoint with a jump) kprobes listed on
|
||||
* optimizing_list.
|
||||
*/
|
||||
static __kprobes void do_optimize_kprobes(void)
|
||||
static void do_optimize_kprobes(void)
|
||||
{
|
||||
/* Optimization never be done when disarmed */
|
||||
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
|
||||
@@ -488,7 +477,7 @@ static __kprobes void do_optimize_kprobes(void)
|
||||
* Unoptimize (replace a jump with a breakpoint and remove the breakpoint
|
||||
* if need) kprobes listed on unoptimizing_list.
|
||||
*/
|
||||
static __kprobes void do_unoptimize_kprobes(void)
|
||||
static void do_unoptimize_kprobes(void)
|
||||
{
|
||||
struct optimized_kprobe *op, *tmp;
|
||||
|
||||
@@ -520,7 +509,7 @@ static __kprobes void do_unoptimize_kprobes(void)
|
||||
}
|
||||
|
||||
/* Reclaim all kprobes on the free_list */
|
||||
static __kprobes void do_free_cleaned_kprobes(void)
|
||||
static void do_free_cleaned_kprobes(void)
|
||||
{
|
||||
struct optimized_kprobe *op, *tmp;
|
||||
|
||||
@@ -532,13 +521,13 @@ static __kprobes void do_free_cleaned_kprobes(void)
|
||||
}
|
||||
|
||||
/* Start optimizer after OPTIMIZE_DELAY passed */
|
||||
static __kprobes void kick_kprobe_optimizer(void)
|
||||
static void kick_kprobe_optimizer(void)
|
||||
{
|
||||
schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY);
|
||||
}
|
||||
|
||||
/* Kprobe jump optimizer */
|
||||
static __kprobes void kprobe_optimizer(struct work_struct *work)
|
||||
static void kprobe_optimizer(struct work_struct *work)
|
||||
{
|
||||
mutex_lock(&kprobe_mutex);
|
||||
/* Lock modules while optimizing kprobes */
|
||||
@@ -574,7 +563,7 @@ static __kprobes void kprobe_optimizer(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* Wait for completing optimization and unoptimization */
|
||||
static __kprobes void wait_for_kprobe_optimizer(void)
|
||||
static void wait_for_kprobe_optimizer(void)
|
||||
{
|
||||
mutex_lock(&kprobe_mutex);
|
||||
|
||||
@@ -593,7 +582,7 @@ static __kprobes void wait_for_kprobe_optimizer(void)
|
||||
}
|
||||
|
||||
/* Optimize kprobe if p is ready to be optimized */
|
||||
static __kprobes void optimize_kprobe(struct kprobe *p)
|
||||
static void optimize_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -627,7 +616,7 @@ static __kprobes void optimize_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Short cut to direct unoptimizing */
|
||||
static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
{
|
||||
get_online_cpus();
|
||||
arch_unoptimize_kprobe(op);
|
||||
@@ -637,7 +626,7 @@ static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
}
|
||||
|
||||
/* Unoptimize a kprobe if p is optimized */
|
||||
static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force)
|
||||
static void unoptimize_kprobe(struct kprobe *p, bool force)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -697,7 +686,7 @@ static void reuse_unused_kprobe(struct kprobe *ap)
|
||||
}
|
||||
|
||||
/* Remove optimized instructions */
|
||||
static void __kprobes kill_optimized_kprobe(struct kprobe *p)
|
||||
static void kill_optimized_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -723,7 +712,7 @@ static void __kprobes kill_optimized_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Try to prepare optimized instructions */
|
||||
static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
|
||||
static void prepare_optimized_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -732,7 +721,7 @@ static __kprobes void prepare_optimized_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Allocate new optimized_kprobe and try to prepare optimized instructions */
|
||||
static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
|
||||
static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct optimized_kprobe *op;
|
||||
|
||||
@@ -747,13 +736,13 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
|
||||
return &op->kp;
|
||||
}
|
||||
|
||||
static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
|
||||
static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p);
|
||||
|
||||
/*
|
||||
* Prepare an optimized_kprobe and optimize it
|
||||
* NOTE: p must be a normal registered kprobe
|
||||
*/
|
||||
static __kprobes void try_to_optimize_kprobe(struct kprobe *p)
|
||||
static void try_to_optimize_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *ap;
|
||||
struct optimized_kprobe *op;
|
||||
@@ -787,7 +776,7 @@ out:
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
static void __kprobes optimize_all_kprobes(void)
|
||||
static void optimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
@@ -810,7 +799,7 @@ out:
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
}
|
||||
|
||||
static void __kprobes unoptimize_all_kprobes(void)
|
||||
static void unoptimize_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
@@ -861,7 +850,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
/* Put a breakpoint for a probe. Must be called with text_mutex locked */
|
||||
static void __kprobes __arm_kprobe(struct kprobe *p)
|
||||
static void __arm_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *_p;
|
||||
|
||||
@@ -876,7 +865,7 @@ static void __kprobes __arm_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Remove the breakpoint of a probe. Must be called with text_mutex locked */
|
||||
static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt)
|
||||
static void __disarm_kprobe(struct kprobe *p, bool reopt)
|
||||
{
|
||||
struct kprobe *_p;
|
||||
|
||||
@@ -911,13 +900,13 @@ static void reuse_unused_kprobe(struct kprobe *ap)
|
||||
BUG_ON(kprobe_unused(ap));
|
||||
}
|
||||
|
||||
static __kprobes void free_aggr_kprobe(struct kprobe *p)
|
||||
static void free_aggr_kprobe(struct kprobe *p)
|
||||
{
|
||||
arch_remove_kprobe(p);
|
||||
kfree(p);
|
||||
}
|
||||
|
||||
static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
|
||||
static struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
|
||||
{
|
||||
return kzalloc(sizeof(struct kprobe), GFP_KERNEL);
|
||||
}
|
||||
@@ -931,7 +920,7 @@ static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
|
||||
static int kprobe_ftrace_enabled;
|
||||
|
||||
/* Must ensure p->addr is really on ftrace */
|
||||
static int __kprobes prepare_kprobe(struct kprobe *p)
|
||||
static int prepare_kprobe(struct kprobe *p)
|
||||
{
|
||||
if (!kprobe_ftrace(p))
|
||||
return arch_prepare_kprobe(p);
|
||||
@@ -940,7 +929,7 @@ static int __kprobes prepare_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Caller must lock kprobe_mutex */
|
||||
static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
|
||||
static void arm_kprobe_ftrace(struct kprobe *p)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -955,7 +944,7 @@ static void __kprobes arm_kprobe_ftrace(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Caller must lock kprobe_mutex */
|
||||
static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
|
||||
static void disarm_kprobe_ftrace(struct kprobe *p)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -975,7 +964,7 @@ static void __kprobes disarm_kprobe_ftrace(struct kprobe *p)
|
||||
#endif
|
||||
|
||||
/* Arm a kprobe with text_mutex */
|
||||
static void __kprobes arm_kprobe(struct kprobe *kp)
|
||||
static void arm_kprobe(struct kprobe *kp)
|
||||
{
|
||||
if (unlikely(kprobe_ftrace(kp))) {
|
||||
arm_kprobe_ftrace(kp);
|
||||
@@ -992,7 +981,7 @@ static void __kprobes arm_kprobe(struct kprobe *kp)
|
||||
}
|
||||
|
||||
/* Disarm a kprobe with text_mutex */
|
||||
static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
|
||||
static void disarm_kprobe(struct kprobe *kp, bool reopt)
|
||||
{
|
||||
if (unlikely(kprobe_ftrace(kp))) {
|
||||
disarm_kprobe_ftrace(kp);
|
||||
@@ -1008,7 +997,7 @@ static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt)
|
||||
* Aggregate handlers for multiple kprobes support - these handlers
|
||||
* take care of invoking the individual kprobe handlers on p->list
|
||||
*/
|
||||
static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -1022,9 +1011,10 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(aggr_pre_handler);
|
||||
|
||||
static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -1036,9 +1026,10 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
}
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(aggr_post_handler);
|
||||
|
||||
static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
int trapnr)
|
||||
{
|
||||
struct kprobe *cur = __this_cpu_read(kprobe_instance);
|
||||
|
||||
@@ -1052,8 +1043,9 @@ static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(aggr_fault_handler);
|
||||
|
||||
static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kprobe *cur = __this_cpu_read(kprobe_instance);
|
||||
int ret = 0;
|
||||
@@ -1065,9 +1057,10 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
|
||||
reset_kprobe_instance();
|
||||
return ret;
|
||||
}
|
||||
NOKPROBE_SYMBOL(aggr_break_handler);
|
||||
|
||||
/* Walks the list and increments nmissed count for multiprobe case */
|
||||
void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
|
||||
void kprobes_inc_nmissed_count(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
if (!kprobe_aggrprobe(p)) {
|
||||
@@ -1078,9 +1071,10 @@ void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
|
||||
}
|
||||
return;
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
|
||||
|
||||
void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
|
||||
struct hlist_head *head)
|
||||
void recycle_rp_inst(struct kretprobe_instance *ri,
|
||||
struct hlist_head *head)
|
||||
{
|
||||
struct kretprobe *rp = ri->rp;
|
||||
|
||||
@@ -1095,8 +1089,9 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
|
||||
/* Unregistering */
|
||||
hlist_add_head(&ri->hlist, head);
|
||||
}
|
||||
NOKPROBE_SYMBOL(recycle_rp_inst);
|
||||
|
||||
void __kprobes kretprobe_hash_lock(struct task_struct *tsk,
|
||||
void kretprobe_hash_lock(struct task_struct *tsk,
|
||||
struct hlist_head **head, unsigned long *flags)
|
||||
__acquires(hlist_lock)
|
||||
{
|
||||
@@ -1107,17 +1102,19 @@ __acquires(hlist_lock)
|
||||
hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_hash_lock);
|
||||
|
||||
static void __kprobes kretprobe_table_lock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
static void kretprobe_table_lock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
__acquires(hlist_lock)
|
||||
{
|
||||
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_lock_irqsave(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_lock);
|
||||
|
||||
void __kprobes kretprobe_hash_unlock(struct task_struct *tsk,
|
||||
unsigned long *flags)
|
||||
void kretprobe_hash_unlock(struct task_struct *tsk,
|
||||
unsigned long *flags)
|
||||
__releases(hlist_lock)
|
||||
{
|
||||
unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
|
||||
@@ -1126,14 +1123,16 @@ __releases(hlist_lock)
|
||||
hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_unlock_irqrestore(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_hash_unlock);
|
||||
|
||||
static void __kprobes kretprobe_table_unlock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
static void kretprobe_table_unlock(unsigned long hash,
|
||||
unsigned long *flags)
|
||||
__releases(hlist_lock)
|
||||
{
|
||||
raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
|
||||
raw_spin_unlock_irqrestore(hlist_lock, *flags);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_unlock);
|
||||
|
||||
/*
|
||||
* This function is called from finish_task_switch when task tk becomes dead,
|
||||
@@ -1141,7 +1140,7 @@ __releases(hlist_lock)
|
||||
* with this task. These left over instances represent probed functions
|
||||
* that have been called but will never return.
|
||||
*/
|
||||
void __kprobes kprobe_flush_task(struct task_struct *tk)
|
||||
void kprobe_flush_task(struct task_struct *tk)
|
||||
{
|
||||
struct kretprobe_instance *ri;
|
||||
struct hlist_head *head, empty_rp;
|
||||
@@ -1166,6 +1165,7 @@ void __kprobes kprobe_flush_task(struct task_struct *tk)
|
||||
kfree(ri);
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_flush_task);
|
||||
|
||||
static inline void free_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
@@ -1178,7 +1178,7 @@ static inline void free_rp_inst(struct kretprobe *rp)
|
||||
}
|
||||
}
|
||||
|
||||
static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
|
||||
static void cleanup_rp_inst(struct kretprobe *rp)
|
||||
{
|
||||
unsigned long flags, hash;
|
||||
struct kretprobe_instance *ri;
|
||||
@@ -1197,12 +1197,13 @@ static void __kprobes cleanup_rp_inst(struct kretprobe *rp)
|
||||
}
|
||||
free_rp_inst(rp);
|
||||
}
|
||||
NOKPROBE_SYMBOL(cleanup_rp_inst);
|
||||
|
||||
/*
|
||||
* Add the new probe to ap->list. Fail if this is the
|
||||
* second jprobe at the address - two jprobes can't coexist
|
||||
*/
|
||||
static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
|
||||
|
||||
@@ -1226,7 +1227,7 @@ static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
* Fill in the required fields of the "manager kprobe". Replace the
|
||||
* earlier kprobe in the hlist with the manager kprobe
|
||||
*/
|
||||
static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
{
|
||||
/* Copy p's insn slot to ap */
|
||||
copy_kprobe(p, ap);
|
||||
@@ -1252,8 +1253,7 @@ static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
|
||||
* This is the second or subsequent kprobe at the address - handle
|
||||
* the intricacies
|
||||
*/
|
||||
static int __kprobes register_aggr_kprobe(struct kprobe *orig_p,
|
||||
struct kprobe *p)
|
||||
static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *ap = orig_p;
|
||||
@@ -1324,25 +1324,29 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __kprobes in_kprobes_functions(unsigned long addr)
|
||||
bool __weak arch_within_kprobe_blacklist(unsigned long addr)
|
||||
{
|
||||
struct kprobe_blackpoint *kb;
|
||||
/* The __kprobes marked functions and entry code must not be probed */
|
||||
return addr >= (unsigned long)__kprobes_text_start &&
|
||||
addr < (unsigned long)__kprobes_text_end;
|
||||
}
|
||||
|
||||
if (addr >= (unsigned long)__kprobes_text_start &&
|
||||
addr < (unsigned long)__kprobes_text_end)
|
||||
return -EINVAL;
|
||||
static bool within_kprobe_blacklist(unsigned long addr)
|
||||
{
|
||||
struct kprobe_blacklist_entry *ent;
|
||||
|
||||
if (arch_within_kprobe_blacklist(addr))
|
||||
return true;
|
||||
/*
|
||||
* If there exists a kprobe_blacklist, verify and
|
||||
* fail any probe registration in the prohibited area
|
||||
*/
|
||||
for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
|
||||
if (kb->start_addr) {
|
||||
if (addr >= kb->start_addr &&
|
||||
addr < (kb->start_addr + kb->range))
|
||||
return -EINVAL;
|
||||
}
|
||||
list_for_each_entry(ent, &kprobe_blacklist, list) {
|
||||
if (addr >= ent->start_addr && addr < ent->end_addr)
|
||||
return true;
|
||||
}
|
||||
return 0;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1351,7 +1355,7 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
|
||||
* This returns encoded errors if it fails to look up symbol or invalid
|
||||
* combination of parameters.
|
||||
*/
|
||||
static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
|
||||
static kprobe_opcode_t *kprobe_addr(struct kprobe *p)
|
||||
{
|
||||
kprobe_opcode_t *addr = p->addr;
|
||||
|
||||
@@ -1374,7 +1378,7 @@ invalid:
|
||||
}
|
||||
|
||||
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
|
||||
static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
|
||||
static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *ap, *list_p;
|
||||
|
||||
@@ -1406,8 +1410,8 @@ static inline int check_kprobe_rereg(struct kprobe *p)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __kprobes int check_kprobe_address_safe(struct kprobe *p,
|
||||
struct module **probed_mod)
|
||||
static int check_kprobe_address_safe(struct kprobe *p,
|
||||
struct module **probed_mod)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long ftrace_addr;
|
||||
@@ -1433,7 +1437,7 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
|
||||
|
||||
/* Ensure it is not in reserved area nor out of text */
|
||||
if (!kernel_text_address((unsigned long) p->addr) ||
|
||||
in_kprobes_functions((unsigned long) p->addr) ||
|
||||
within_kprobe_blacklist((unsigned long) p->addr) ||
|
||||
jump_label_text_reserved(p->addr, p->addr)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
@@ -1469,7 +1473,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int __kprobes register_kprobe(struct kprobe *p)
|
||||
int register_kprobe(struct kprobe *p)
|
||||
{
|
||||
int ret;
|
||||
struct kprobe *old_p;
|
||||
@@ -1531,7 +1535,7 @@ out:
|
||||
EXPORT_SYMBOL_GPL(register_kprobe);
|
||||
|
||||
/* Check if all probes on the aggrprobe are disabled */
|
||||
static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
|
||||
static int aggr_kprobe_disabled(struct kprobe *ap)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -1547,7 +1551,7 @@ static int __kprobes aggr_kprobe_disabled(struct kprobe *ap)
|
||||
}
|
||||
|
||||
/* Disable one kprobe: Make sure called under kprobe_mutex is locked */
|
||||
static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
|
||||
static struct kprobe *__disable_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *orig_p;
|
||||
|
||||
@@ -1574,7 +1578,7 @@ static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p)
|
||||
/*
|
||||
* Unregister a kprobe without a scheduler synchronization.
|
||||
*/
|
||||
static int __kprobes __unregister_kprobe_top(struct kprobe *p)
|
||||
static int __unregister_kprobe_top(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *ap, *list_p;
|
||||
|
||||
@@ -1631,7 +1635,7 @@ disarmed:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
|
||||
static void __unregister_kprobe_bottom(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *ap;
|
||||
|
||||
@@ -1647,7 +1651,7 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
|
||||
/* Otherwise, do nothing. */
|
||||
}
|
||||
|
||||
int __kprobes register_kprobes(struct kprobe **kps, int num)
|
||||
int register_kprobes(struct kprobe **kps, int num)
|
||||
{
|
||||
int i, ret = 0;
|
||||
|
||||
@@ -1665,13 +1669,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kprobes);
|
||||
|
||||
void __kprobes unregister_kprobe(struct kprobe *p)
|
||||
void unregister_kprobe(struct kprobe *p)
|
||||
{
|
||||
unregister_kprobes(&p, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kprobe);
|
||||
|
||||
void __kprobes unregister_kprobes(struct kprobe **kps, int num)
|
||||
void unregister_kprobes(struct kprobe **kps, int num)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1700,7 +1704,7 @@ unsigned long __weak arch_deref_entry_point(void *entry)
|
||||
return (unsigned long)entry;
|
||||
}
|
||||
|
||||
int __kprobes register_jprobes(struct jprobe **jps, int num)
|
||||
int register_jprobes(struct jprobe **jps, int num)
|
||||
{
|
||||
struct jprobe *jp;
|
||||
int ret = 0, i;
|
||||
@@ -1731,19 +1735,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_jprobes);
|
||||
|
||||
int __kprobes register_jprobe(struct jprobe *jp)
|
||||
int register_jprobe(struct jprobe *jp)
|
||||
{
|
||||
return register_jprobes(&jp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_jprobe);
|
||||
|
||||
void __kprobes unregister_jprobe(struct jprobe *jp)
|
||||
void unregister_jprobe(struct jprobe *jp)
|
||||
{
|
||||
unregister_jprobes(&jp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_jprobe);
|
||||
|
||||
void __kprobes unregister_jprobes(struct jprobe **jps, int num)
|
||||
void unregister_jprobes(struct jprobe **jps, int num)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1768,8 +1772,7 @@ EXPORT_SYMBOL_GPL(unregister_jprobes);
|
||||
* This kprobe pre_handler is registered with every kretprobe. When probe
|
||||
* hits it will set up the return probe.
|
||||
*/
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
|
||||
unsigned long hash, flags = 0;
|
||||
@@ -1807,8 +1810,9 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(pre_handler_kretprobe);
|
||||
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kretprobe_instance *inst;
|
||||
@@ -1861,7 +1865,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
|
||||
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
||||
int register_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
int ret = 0, i;
|
||||
|
||||
@@ -1879,13 +1883,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobes);
|
||||
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
void unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
unregister_kretprobes(&rp, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
|
||||
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
void unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
int i;
|
||||
|
||||
@@ -1908,38 +1912,38 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobes);
|
||||
|
||||
#else /* CONFIG_KRETPROBES */
|
||||
int __kprobes register_kretprobe(struct kretprobe *rp)
|
||||
int register_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobe);
|
||||
|
||||
int __kprobes register_kretprobes(struct kretprobe **rps, int num)
|
||||
int register_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_kretprobes);
|
||||
|
||||
void __kprobes unregister_kretprobe(struct kretprobe *rp)
|
||||
void unregister_kretprobe(struct kretprobe *rp)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobe);
|
||||
|
||||
void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
void unregister_kretprobes(struct kretprobe **rps, int num)
|
||||
{
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_kretprobes);
|
||||
|
||||
static int __kprobes pre_handler_kretprobe(struct kprobe *p,
|
||||
struct pt_regs *regs)
|
||||
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(pre_handler_kretprobe);
|
||||
|
||||
#endif /* CONFIG_KRETPROBES */
|
||||
|
||||
/* Set the kprobe gone and remove its instruction buffer. */
|
||||
static void __kprobes kill_kprobe(struct kprobe *p)
|
||||
static void kill_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
@@ -1963,7 +1967,7 @@ static void __kprobes kill_kprobe(struct kprobe *p)
|
||||
}
|
||||
|
||||
/* Disable one kprobe */
|
||||
int __kprobes disable_kprobe(struct kprobe *kp)
|
||||
int disable_kprobe(struct kprobe *kp)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
@@ -1979,7 +1983,7 @@ int __kprobes disable_kprobe(struct kprobe *kp)
|
||||
EXPORT_SYMBOL_GPL(disable_kprobe);
|
||||
|
||||
/* Enable one kprobe */
|
||||
int __kprobes enable_kprobe(struct kprobe *kp)
|
||||
int enable_kprobe(struct kprobe *kp)
|
||||
{
|
||||
int ret = 0;
|
||||
struct kprobe *p;
|
||||
@@ -2012,16 +2016,49 @@ out:
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(enable_kprobe);
|
||||
|
||||
void __kprobes dump_kprobe(struct kprobe *kp)
|
||||
void dump_kprobe(struct kprobe *kp)
|
||||
{
|
||||
printk(KERN_WARNING "Dumping kprobe:\n");
|
||||
printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
|
||||
kp->symbol_name, kp->addr, kp->offset);
|
||||
}
|
||||
NOKPROBE_SYMBOL(dump_kprobe);
|
||||
|
||||
/*
|
||||
* Lookup and populate the kprobe_blacklist.
|
||||
*
|
||||
* Unlike the kretprobe blacklist, we'll need to determine
|
||||
* the range of addresses that belong to the said functions,
|
||||
* since a kprobe need not necessarily be at the beginning
|
||||
* of a function.
|
||||
*/
|
||||
static int __init populate_kprobe_blacklist(unsigned long *start,
|
||||
unsigned long *end)
|
||||
{
|
||||
unsigned long *iter;
|
||||
struct kprobe_blacklist_entry *ent;
|
||||
unsigned long offset = 0, size = 0;
|
||||
|
||||
for (iter = start; iter < end; iter++) {
|
||||
if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) {
|
||||
pr_err("Failed to find blacklist %p\n", (void *)*iter);
|
||||
continue;
|
||||
}
|
||||
|
||||
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
ent->start_addr = *iter;
|
||||
ent->end_addr = *iter + size;
|
||||
INIT_LIST_HEAD(&ent->list);
|
||||
list_add_tail(&ent->list, &kprobe_blacklist);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Module notifier call back, checking kprobes on the module */
|
||||
static int __kprobes kprobes_module_callback(struct notifier_block *nb,
|
||||
unsigned long val, void *data)
|
||||
static int kprobes_module_callback(struct notifier_block *nb,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct module *mod = data;
|
||||
struct hlist_head *head;
|
||||
@@ -2062,14 +2099,13 @@ static struct notifier_block kprobe_module_nb = {
|
||||
.priority = 0
|
||||
};
|
||||
|
||||
/* Markers of _kprobe_blacklist section */
|
||||
extern unsigned long __start_kprobe_blacklist[];
|
||||
extern unsigned long __stop_kprobe_blacklist[];
|
||||
|
||||
static int __init init_kprobes(void)
|
||||
{
|
||||
int i, err = 0;
|
||||
unsigned long offset = 0, size = 0;
|
||||
char *modname, namebuf[KSYM_NAME_LEN];
|
||||
const char *symbol_name;
|
||||
void *addr;
|
||||
struct kprobe_blackpoint *kb;
|
||||
|
||||
/* FIXME allocate the probe table, currently defined statically */
|
||||
/* initialize all list heads */
|
||||
@@ -2079,26 +2115,11 @@ static int __init init_kprobes(void)
|
||||
raw_spin_lock_init(&(kretprobe_table_locks[i].lock));
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup and populate the kprobe_blacklist.
|
||||
*
|
||||
* Unlike the kretprobe blacklist, we'll need to determine
|
||||
* the range of addresses that belong to the said functions,
|
||||
* since a kprobe need not necessarily be at the beginning
|
||||
* of a function.
|
||||
*/
|
||||
for (kb = kprobe_blacklist; kb->name != NULL; kb++) {
|
||||
kprobe_lookup_name(kb->name, addr);
|
||||
if (!addr)
|
||||
continue;
|
||||
|
||||
kb->start_addr = (unsigned long)addr;
|
||||
symbol_name = kallsyms_lookup(kb->start_addr,
|
||||
&size, &offset, &modname, namebuf);
|
||||
if (!symbol_name)
|
||||
kb->range = 0;
|
||||
else
|
||||
kb->range = size;
|
||||
err = populate_kprobe_blacklist(__start_kprobe_blacklist,
|
||||
__stop_kprobe_blacklist);
|
||||
if (err) {
|
||||
pr_err("kprobes: failed to populate blacklist: %d\n", err);
|
||||
pr_err("Please take care of using kprobes.\n");
|
||||
}
|
||||
|
||||
if (kretprobe_blacklist_size) {
|
||||
@@ -2138,7 +2159,7 @@ static int __init init_kprobes(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_FS
|
||||
static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
|
||||
static void report_probe(struct seq_file *pi, struct kprobe *p,
|
||||
const char *sym, int offset, char *modname, struct kprobe *pp)
|
||||
{
|
||||
char *kprobe_type;
|
||||
@@ -2167,12 +2188,12 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
|
||||
(kprobe_ftrace(pp) ? "[FTRACE]" : ""));
|
||||
}
|
||||
|
||||
static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
|
||||
static void *kprobe_seq_start(struct seq_file *f, loff_t *pos)
|
||||
{
|
||||
return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL;
|
||||
}
|
||||
|
||||
static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
|
||||
static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
|
||||
{
|
||||
(*pos)++;
|
||||
if (*pos >= KPROBE_TABLE_SIZE)
|
||||
@@ -2180,12 +2201,12 @@ static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos)
|
||||
return pos;
|
||||
}
|
||||
|
||||
static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v)
|
||||
static void kprobe_seq_stop(struct seq_file *f, void *v)
|
||||
{
|
||||
/* Nothing to do */
|
||||
}
|
||||
|
||||
static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
|
||||
static int show_kprobe_addr(struct seq_file *pi, void *v)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p, *kp;
|
||||
@@ -2216,7 +2237,7 @@ static const struct seq_operations kprobes_seq_ops = {
|
||||
.show = show_kprobe_addr
|
||||
};
|
||||
|
||||
static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
|
||||
static int kprobes_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return seq_open(filp, &kprobes_seq_ops);
|
||||
}
|
||||
@@ -2228,7 +2249,47 @@ static const struct file_operations debugfs_kprobes_operations = {
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void __kprobes arm_all_kprobes(void)
|
||||
/* kprobes/blacklist -- shows which functions can not be probed */
|
||||
static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
return seq_list_start(&kprobe_blacklist, *pos);
|
||||
}
|
||||
|
||||
static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
return seq_list_next(v, &kprobe_blacklist, pos);
|
||||
}
|
||||
|
||||
static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
|
||||
{
|
||||
struct kprobe_blacklist_entry *ent =
|
||||
list_entry(v, struct kprobe_blacklist_entry, list);
|
||||
|
||||
seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
|
||||
(void *)ent->end_addr, (void *)ent->start_addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations kprobe_blacklist_seq_ops = {
|
||||
.start = kprobe_blacklist_seq_start,
|
||||
.next = kprobe_blacklist_seq_next,
|
||||
.stop = kprobe_seq_stop, /* Reuse void function */
|
||||
.show = kprobe_blacklist_seq_show,
|
||||
};
|
||||
|
||||
static int kprobe_blacklist_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return seq_open(filp, &kprobe_blacklist_seq_ops);
|
||||
}
|
||||
|
||||
static const struct file_operations debugfs_kprobe_blacklist_ops = {
|
||||
.open = kprobe_blacklist_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static void arm_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
@@ -2256,7 +2317,7 @@ already_enabled:
|
||||
return;
|
||||
}
|
||||
|
||||
static void __kprobes disarm_all_kprobes(void)
|
||||
static void disarm_all_kprobes(void)
|
||||
{
|
||||
struct hlist_head *head;
|
||||
struct kprobe *p;
|
||||
@@ -2340,7 +2401,7 @@ static const struct file_operations fops_kp = {
|
||||
.llseek = default_llseek,
|
||||
};
|
||||
|
||||
static int __kprobes debugfs_kprobe_init(void)
|
||||
static int __init debugfs_kprobe_init(void)
|
||||
{
|
||||
struct dentry *dir, *file;
|
||||
unsigned int value = 1;
|
||||
@@ -2351,19 +2412,24 @@ static int __kprobes debugfs_kprobe_init(void)
|
||||
|
||||
file = debugfs_create_file("list", 0444, dir, NULL,
|
||||
&debugfs_kprobes_operations);
|
||||
if (!file) {
|
||||
debugfs_remove(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!file)
|
||||
goto error;
|
||||
|
||||
file = debugfs_create_file("enabled", 0600, dir,
|
||||
&value, &fops_kp);
|
||||
if (!file) {
|
||||
debugfs_remove(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (!file)
|
||||
goto error;
|
||||
|
||||
file = debugfs_create_file("blacklist", 0444, dir, NULL,
|
||||
&debugfs_kprobe_blacklist_ops);
|
||||
if (!file)
|
||||
goto error;
|
||||
|
||||
return 0;
|
||||
|
||||
error:
|
||||
debugfs_remove(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
late_initcall(debugfs_kprobe_init);
|
||||
|
@@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
|
||||
obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
|
||||
obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
|
||||
obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
|
||||
obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
|
||||
obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
|
||||
|
133
kernel/locking/qrwlock.c
Normal file
133
kernel/locking/qrwlock.c
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* Queue read/write lock
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
|
||||
*
|
||||
* Authors: Waiman Long <waiman.long@hp.com>
|
||||
*/
|
||||
#include <linux/smp.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
/**
|
||||
* rspin_until_writer_unlock - inc reader count & spin until writer is gone
|
||||
* @lock : Pointer to queue rwlock structure
|
||||
* @writer: Current queue rwlock writer status byte
|
||||
*
|
||||
* In interrupt context or at the head of the queue, the reader will just
|
||||
* increment the reader count & wait until the writer releases the lock.
|
||||
*/
|
||||
static __always_inline void
|
||||
rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
|
||||
{
|
||||
while ((cnts & _QW_WMASK) == _QW_LOCKED) {
|
||||
arch_mutex_cpu_relax();
|
||||
cnts = smp_load_acquire((u32 *)&lock->cnts);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* queue_read_lock_slowpath - acquire read lock of a queue rwlock
|
||||
* @lock: Pointer to queue rwlock structure
|
||||
*/
|
||||
void queue_read_lock_slowpath(struct qrwlock *lock)
|
||||
{
|
||||
u32 cnts;
|
||||
|
||||
/*
|
||||
* Readers come here when they cannot get the lock without waiting
|
||||
*/
|
||||
if (unlikely(in_interrupt())) {
|
||||
/*
|
||||
* Readers in interrupt context will spin until the lock is
|
||||
* available without waiting in the queue.
|
||||
*/
|
||||
cnts = smp_load_acquire((u32 *)&lock->cnts);
|
||||
rspin_until_writer_unlock(lock, cnts);
|
||||
return;
|
||||
}
|
||||
atomic_sub(_QR_BIAS, &lock->cnts);
|
||||
|
||||
/*
|
||||
* Put the reader into the wait queue
|
||||
*/
|
||||
arch_spin_lock(&lock->lock);
|
||||
|
||||
/*
|
||||
* At the head of the wait queue now, wait until the writer state
|
||||
* goes to 0 and then try to increment the reader count and get
|
||||
* the lock. It is possible that an incoming writer may steal the
|
||||
* lock in the interim, so it is necessary to check the writer byte
|
||||
* to make sure that the write lock isn't taken.
|
||||
*/
|
||||
while (atomic_read(&lock->cnts) & _QW_WMASK)
|
||||
arch_mutex_cpu_relax();
|
||||
|
||||
cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
|
||||
rspin_until_writer_unlock(lock, cnts);
|
||||
|
||||
/*
|
||||
* Signal the next one in queue to become queue head
|
||||
*/
|
||||
arch_spin_unlock(&lock->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(queue_read_lock_slowpath);
|
||||
|
||||
/**
|
||||
* queue_write_lock_slowpath - acquire write lock of a queue rwlock
|
||||
* @lock : Pointer to queue rwlock structure
|
||||
*/
|
||||
void queue_write_lock_slowpath(struct qrwlock *lock)
|
||||
{
|
||||
u32 cnts;
|
||||
|
||||
/* Put the writer into the wait queue */
|
||||
arch_spin_lock(&lock->lock);
|
||||
|
||||
/* Try to acquire the lock directly if no reader is present */
|
||||
if (!atomic_read(&lock->cnts) &&
|
||||
(atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
* Set the waiting flag to notify readers that a writer is pending,
|
||||
* or wait for a previous writer to go away.
|
||||
*/
|
||||
for (;;) {
|
||||
cnts = atomic_read(&lock->cnts);
|
||||
if (!(cnts & _QW_WMASK) &&
|
||||
(atomic_cmpxchg(&lock->cnts, cnts,
|
||||
cnts | _QW_WAITING) == cnts))
|
||||
break;
|
||||
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
|
||||
/* When no more readers, set the locked flag */
|
||||
for (;;) {
|
||||
cnts = atomic_read(&lock->cnts);
|
||||
if ((cnts == _QW_WAITING) &&
|
||||
(atomic_cmpxchg(&lock->cnts, _QW_WAITING,
|
||||
_QW_LOCKED) == _QW_WAITING))
|
||||
break;
|
||||
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
unlock:
|
||||
arch_spin_unlock(&lock->lock);
|
||||
}
|
||||
EXPORT_SYMBOL(queue_write_lock_slowpath);
|
@@ -31,3 +31,8 @@ static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
|
||||
{
|
||||
return (waiter != NULL);
|
||||
}
|
||||
|
||||
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
{
|
||||
debug_rt_mutex_print_deadlock(w);
|
||||
}
|
||||
|
@@ -83,6 +83,47 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
owner = *p;
|
||||
} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
|
||||
}
|
||||
|
||||
/*
|
||||
* Safe fastpath aware unlock:
|
||||
* 1) Clear the waiters bit
|
||||
* 2) Drop lock->wait_lock
|
||||
* 3) Try to unlock the lock with cmpxchg
|
||||
*/
|
||||
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
|
||||
__releases(lock->wait_lock)
|
||||
{
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
|
||||
clear_rt_mutex_waiters(lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
/*
|
||||
* If a new waiter comes in between the unlock and the cmpxchg
|
||||
* we have two situations:
|
||||
*
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* cmpxchg(p, owner, 0) == owner
|
||||
* mark_rt_mutex_waiters(lock);
|
||||
* acquire(lock);
|
||||
* or:
|
||||
*
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* mark_rt_mutex_waiters(lock);
|
||||
*
|
||||
* cmpxchg(p, owner, 0) != owner
|
||||
* enqueue_waiter();
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* wake waiter();
|
||||
* unlock(wait_lock);
|
||||
* lock(wait_lock);
|
||||
* acquire(lock);
|
||||
*/
|
||||
return rt_mutex_cmpxchg(lock, owner, NULL);
|
||||
}
|
||||
|
||||
#else
|
||||
# define rt_mutex_cmpxchg(l,c,n) (0)
|
||||
static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
@@ -90,6 +131,17 @@ static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
|
||||
lock->owner = (struct task_struct *)
|
||||
((unsigned long)lock->owner | RT_MUTEX_HAS_WAITERS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simple slow path only version: lock->owner is protected by lock->wait_lock.
|
||||
*/
|
||||
static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
|
||||
__releases(lock->wait_lock)
|
||||
{
|
||||
lock->owner = NULL;
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int
|
||||
@@ -260,27 +312,36 @@ static void rt_mutex_adjust_prio(struct task_struct *task)
|
||||
*/
|
||||
int max_lock_depth = 1024;
|
||||
|
||||
static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
|
||||
{
|
||||
return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Adjust the priority chain. Also used for deadlock detection.
|
||||
* Decreases task's usage by one - may thus free the task.
|
||||
*
|
||||
* @task: the task owning the mutex (owner) for which a chain walk is probably
|
||||
* needed
|
||||
* @task: the task owning the mutex (owner) for which a chain walk is
|
||||
* probably needed
|
||||
* @deadlock_detect: do we have to carry out deadlock detection?
|
||||
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
|
||||
* things for a task that has just got its priority adjusted, and
|
||||
* is waiting on a mutex)
|
||||
* @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
|
||||
* things for a task that has just got its priority adjusted, and
|
||||
* is waiting on a mutex)
|
||||
* @next_lock: the mutex on which the owner of @orig_lock was blocked before
|
||||
* we dropped its pi_lock. Is never dereferenced, only used for
|
||||
* comparison to detect lock chain changes.
|
||||
* @orig_waiter: rt_mutex_waiter struct for the task that has just donated
|
||||
* its priority to the mutex owner (can be NULL in the case
|
||||
* depicted above or if the top waiter is gone away and we are
|
||||
* actually deboosting the owner)
|
||||
* @top_task: the current top waiter
|
||||
* its priority to the mutex owner (can be NULL in the case
|
||||
* depicted above or if the top waiter is gone away and we are
|
||||
* actually deboosting the owner)
|
||||
* @top_task: the current top waiter
|
||||
*
|
||||
* Returns 0 or -EDEADLK.
|
||||
*/
|
||||
static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
int deadlock_detect,
|
||||
struct rt_mutex *orig_lock,
|
||||
struct rt_mutex *next_lock,
|
||||
struct rt_mutex_waiter *orig_waiter,
|
||||
struct task_struct *top_task)
|
||||
{
|
||||
@@ -314,7 +375,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
}
|
||||
put_task_struct(task);
|
||||
|
||||
return deadlock_detect ? -EDEADLK : 0;
|
||||
return -EDEADLK;
|
||||
}
|
||||
retry:
|
||||
/*
|
||||
@@ -338,6 +399,18 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
if (orig_waiter && !rt_mutex_owner(orig_lock))
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
* We dropped all locks after taking a refcount on @task, so
|
||||
* the task might have moved on in the lock chain or even left
|
||||
* the chain completely and blocks now on an unrelated lock or
|
||||
* on @orig_lock.
|
||||
*
|
||||
* We stored the lock on which @task was blocked in @next_lock,
|
||||
* so we can detect the chain change.
|
||||
*/
|
||||
if (next_lock != waiter->lock)
|
||||
goto out_unlock_pi;
|
||||
|
||||
/*
|
||||
* Drop out, when the task has no waiters. Note,
|
||||
* top_waiter can be NULL, when we are in the deboosting
|
||||
@@ -377,7 +450,7 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
if (lock == orig_lock || rt_mutex_owner(lock) == top_task) {
|
||||
debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
ret = deadlock_detect ? -EDEADLK : 0;
|
||||
ret = -EDEADLK;
|
||||
goto out_unlock_pi;
|
||||
}
|
||||
|
||||
@@ -422,11 +495,26 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
|
||||
__rt_mutex_adjust_prio(task);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether the task which owns the current lock is pi
|
||||
* blocked itself. If yes we store a pointer to the lock for
|
||||
* the lock chain change detection above. After we dropped
|
||||
* task->pi_lock next_lock cannot be dereferenced anymore.
|
||||
*/
|
||||
next_lock = task_blocked_on_lock(task);
|
||||
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
top_waiter = rt_mutex_top_waiter(lock);
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
/*
|
||||
* We reached the end of the lock chain. Stop right here. No
|
||||
* point to go back just to figure that out.
|
||||
*/
|
||||
if (!next_lock)
|
||||
goto out_put_task;
|
||||
|
||||
if (!detect_deadlock && waiter != top_waiter)
|
||||
goto out_put_task;
|
||||
|
||||
@@ -536,8 +624,9 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
{
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
struct rt_mutex_waiter *top_waiter = waiter;
|
||||
unsigned long flags;
|
||||
struct rt_mutex *next_lock;
|
||||
int chain_walk = 0, res;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Early deadlock detection. We really don't want the task to
|
||||
@@ -548,7 +637,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
* which is wrong, as the other waiter is not in a deadlock
|
||||
* situation.
|
||||
*/
|
||||
if (detect_deadlock && owner == task)
|
||||
if (owner == task)
|
||||
return -EDEADLK;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
@@ -569,20 +658,28 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
if (!owner)
|
||||
return 0;
|
||||
|
||||
raw_spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
if (waiter == rt_mutex_top_waiter(lock)) {
|
||||
raw_spin_lock_irqsave(&owner->pi_lock, flags);
|
||||
rt_mutex_dequeue_pi(owner, top_waiter);
|
||||
rt_mutex_enqueue_pi(owner, waiter);
|
||||
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
|
||||
} else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) {
|
||||
chain_walk = 1;
|
||||
}
|
||||
|
||||
if (!chain_walk)
|
||||
/* Store the lock on which owner is blocked or NULL */
|
||||
next_lock = task_blocked_on_lock(owner);
|
||||
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
/*
|
||||
* Even if full deadlock detection is on, if the owner is not
|
||||
* blocked itself, we can avoid finding this out in the chain
|
||||
* walk.
|
||||
*/
|
||||
if (!chain_walk || !next_lock)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -594,8 +691,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
|
||||
task);
|
||||
res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock,
|
||||
next_lock, waiter, task);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
|
||||
@@ -605,7 +702,8 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
|
||||
/*
|
||||
* Wake up the next waiter on the lock.
|
||||
*
|
||||
* Remove the top waiter from the current tasks waiter list and wake it up.
|
||||
* Remove the top waiter from the current tasks pi waiter list and
|
||||
* wake it up.
|
||||
*
|
||||
* Called with lock->wait_lock held.
|
||||
*/
|
||||
@@ -626,10 +724,23 @@ static void wakeup_next_waiter(struct rt_mutex *lock)
|
||||
*/
|
||||
rt_mutex_dequeue_pi(current, waiter);
|
||||
|
||||
rt_mutex_set_owner(lock, NULL);
|
||||
/*
|
||||
* As we are waking up the top waiter, and the waiter stays
|
||||
* queued on the lock until it gets the lock, this lock
|
||||
* obviously has waiters. Just set the bit here and this has
|
||||
* the added benefit of forcing all new tasks into the
|
||||
* slow path making sure no task of lower priority than
|
||||
* the top waiter can steal this lock.
|
||||
*/
|
||||
lock->owner = (void *) RT_MUTEX_HAS_WAITERS;
|
||||
|
||||
raw_spin_unlock_irqrestore(¤t->pi_lock, flags);
|
||||
|
||||
/*
|
||||
* It's safe to dereference waiter as it cannot go away as
|
||||
* long as we hold lock->wait_lock. The waiter task needs to
|
||||
* acquire it in order to dequeue the waiter.
|
||||
*/
|
||||
wake_up_process(waiter->task);
|
||||
}
|
||||
|
||||
@@ -644,8 +755,8 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
{
|
||||
int first = (waiter == rt_mutex_top_waiter(lock));
|
||||
struct task_struct *owner = rt_mutex_owner(lock);
|
||||
struct rt_mutex *next_lock = NULL;
|
||||
unsigned long flags;
|
||||
int chain_walk = 0;
|
||||
|
||||
raw_spin_lock_irqsave(¤t->pi_lock, flags);
|
||||
rt_mutex_dequeue(lock, waiter);
|
||||
@@ -669,13 +780,13 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
}
|
||||
__rt_mutex_adjust_prio(owner);
|
||||
|
||||
if (owner->pi_blocked_on)
|
||||
chain_walk = 1;
|
||||
/* Store the lock on which owner is blocked or NULL */
|
||||
next_lock = task_blocked_on_lock(owner);
|
||||
|
||||
raw_spin_unlock_irqrestore(&owner->pi_lock, flags);
|
||||
}
|
||||
|
||||
if (!chain_walk)
|
||||
if (!next_lock)
|
||||
return;
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
@@ -683,7 +794,7 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
|
||||
rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
|
||||
rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current);
|
||||
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
@@ -696,6 +807,7 @@ static void remove_waiter(struct rt_mutex *lock,
|
||||
void rt_mutex_adjust_pi(struct task_struct *task)
|
||||
{
|
||||
struct rt_mutex_waiter *waiter;
|
||||
struct rt_mutex *next_lock;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock_irqsave(&task->pi_lock, flags);
|
||||
@@ -706,12 +818,13 @@ void rt_mutex_adjust_pi(struct task_struct *task)
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
return;
|
||||
}
|
||||
|
||||
next_lock = waiter->lock;
|
||||
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
|
||||
|
||||
/* gets dropped in rt_mutex_adjust_prio_chain()! */
|
||||
get_task_struct(task);
|
||||
rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
|
||||
|
||||
rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -763,6 +876,26 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void rt_mutex_handle_deadlock(int res, int detect_deadlock,
|
||||
struct rt_mutex_waiter *w)
|
||||
{
|
||||
/*
|
||||
* If the result is not -EDEADLOCK or the caller requested
|
||||
* deadlock detection, nothing to do here.
|
||||
*/
|
||||
if (res != -EDEADLOCK || detect_deadlock)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Yell lowdly and stop the task right here.
|
||||
*/
|
||||
rt_mutex_print_deadlock(w);
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path lock function:
|
||||
*/
|
||||
@@ -802,8 +935,10 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
|
||||
|
||||
set_current_state(TASK_RUNNING);
|
||||
|
||||
if (unlikely(ret))
|
||||
if (unlikely(ret)) {
|
||||
remove_waiter(lock, &waiter);
|
||||
rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter);
|
||||
}
|
||||
|
||||
/*
|
||||
* try_to_take_rt_mutex() sets the waiter bit
|
||||
@@ -859,12 +994,49 @@ rt_mutex_slowunlock(struct rt_mutex *lock)
|
||||
|
||||
rt_mutex_deadlock_account_unlock(current);
|
||||
|
||||
if (!rt_mutex_has_waiters(lock)) {
|
||||
lock->owner = NULL;
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
return;
|
||||
/*
|
||||
* We must be careful here if the fast path is enabled. If we
|
||||
* have no waiters queued we cannot set owner to NULL here
|
||||
* because of:
|
||||
*
|
||||
* foo->lock->owner = NULL;
|
||||
* rtmutex_lock(foo->lock); <- fast path
|
||||
* free = atomic_dec_and_test(foo->refcnt);
|
||||
* rtmutex_unlock(foo->lock); <- fast path
|
||||
* if (free)
|
||||
* kfree(foo);
|
||||
* raw_spin_unlock(foo->lock->wait_lock);
|
||||
*
|
||||
* So for the fastpath enabled kernel:
|
||||
*
|
||||
* Nothing can set the waiters bit as long as we hold
|
||||
* lock->wait_lock. So we do the following sequence:
|
||||
*
|
||||
* owner = rt_mutex_owner(lock);
|
||||
* clear_rt_mutex_waiters(lock);
|
||||
* raw_spin_unlock(&lock->wait_lock);
|
||||
* if (cmpxchg(&lock->owner, owner, 0) == owner)
|
||||
* return;
|
||||
* goto retry;
|
||||
*
|
||||
* The fastpath disabled variant is simple as all access to
|
||||
* lock->owner is serialized by lock->wait_lock:
|
||||
*
|
||||
* lock->owner = NULL;
|
||||
* raw_spin_unlock(&lock->wait_lock);
|
||||
*/
|
||||
while (!rt_mutex_has_waiters(lock)) {
|
||||
/* Drops lock->wait_lock ! */
|
||||
if (unlock_rt_mutex_safe(lock) == true)
|
||||
return;
|
||||
/* Relock the rtmutex and try again */
|
||||
raw_spin_lock(&lock->wait_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The wakeup next waiter path does not suffer from the above
|
||||
* race. See the comments there.
|
||||
*/
|
||||
wakeup_next_waiter(lock);
|
||||
|
||||
raw_spin_unlock(&lock->wait_lock);
|
||||
@@ -1112,7 +1284,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
|
||||
/* We enforce deadlock detection for futexes */
|
||||
ret = task_blocks_on_rt_mutex(lock, waiter, task, 1);
|
||||
|
||||
if (ret && !rt_mutex_owner(lock)) {
|
||||
/*
|
||||
|
@@ -24,3 +24,8 @@
|
||||
#define debug_rt_mutex_print_deadlock(w) do { } while (0)
|
||||
#define debug_rt_mutex_detect_deadlock(w,d) (d)
|
||||
#define debug_rt_mutex_reset_waiter(w) do { } while (0)
|
||||
|
||||
static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w)
|
||||
{
|
||||
WARN(1, "rtmutex deadlock detected\n");
|
||||
}
|
||||
|
@@ -5,11 +5,17 @@
|
||||
*
|
||||
* Writer lock-stealing by Alex Shi <alex.shi@intel.com>
|
||||
* and Michel Lespinasse <walken@google.com>
|
||||
*
|
||||
* Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
|
||||
* and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
|
||||
*/
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
#include "mcs_spinlock.h"
|
||||
|
||||
/*
|
||||
* Guide to the rw_semaphore's count field for common values.
|
||||
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
||||
sem->count = RWSEM_UNLOCKED_VALUE;
|
||||
raw_spin_lock_init(&sem->wait_lock);
|
||||
INIT_LIST_HEAD(&sem->wait_list);
|
||||
#ifdef CONFIG_SMP
|
||||
sem->owner = NULL;
|
||||
sem->osq = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(__init_rwsem);
|
||||
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
|
||||
}
|
||||
|
||||
/*
|
||||
* wait for the read lock to be granted
|
||||
* Wait for the read lock to be granted
|
||||
*/
|
||||
__visible
|
||||
struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
||||
return sem;
|
||||
}
|
||||
|
||||
static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
|
||||
{
|
||||
if (!(count & RWSEM_ACTIVE_MASK)) {
|
||||
/* try acquiring the write lock */
|
||||
if (sem->count == RWSEM_WAITING_BIAS &&
|
||||
cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
|
||||
RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
|
||||
if (!list_is_singular(&sem->wait_list))
|
||||
rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* wait until we successfully acquire the write lock
|
||||
* Try to acquire write lock before the writer has been put on wait queue.
|
||||
*/
|
||||
static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
|
||||
{
|
||||
long old, count = ACCESS_ONCE(sem->count);
|
||||
|
||||
while (true) {
|
||||
if (!(count == 0 || count == RWSEM_WAITING_BIAS))
|
||||
return false;
|
||||
|
||||
old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
|
||||
if (old == count)
|
||||
return true;
|
||||
|
||||
count = old;
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
struct task_struct *owner;
|
||||
bool on_cpu = true;
|
||||
|
||||
if (need_resched())
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
owner = ACCESS_ONCE(sem->owner);
|
||||
if (owner)
|
||||
on_cpu = owner->on_cpu;
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If sem->owner is not set, the rwsem owner may have
|
||||
* just acquired it and not set the owner yet or the rwsem
|
||||
* has been released.
|
||||
*/
|
||||
return on_cpu;
|
||||
}
|
||||
|
||||
static inline bool owner_running(struct rw_semaphore *sem,
|
||||
struct task_struct *owner)
|
||||
{
|
||||
if (sem->owner != owner)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Ensure we emit the owner->on_cpu, dereference _after_ checking
|
||||
* sem->owner still matches owner, if that fails, owner might
|
||||
* point to free()d memory, if it still matches, the rcu_read_lock()
|
||||
* ensures the memory stays valid.
|
||||
*/
|
||||
barrier();
|
||||
|
||||
return owner->on_cpu;
|
||||
}
|
||||
|
||||
static noinline
|
||||
bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
|
||||
{
|
||||
rcu_read_lock();
|
||||
while (owner_running(sem, owner)) {
|
||||
if (need_resched())
|
||||
break;
|
||||
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* We break out the loop above on need_resched() or when the
|
||||
* owner changed, which is a sign for heavy contention. Return
|
||||
* success only when sem->owner is NULL.
|
||||
*/
|
||||
return sem->owner == NULL;
|
||||
}
|
||||
|
||||
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
|
||||
{
|
||||
struct task_struct *owner;
|
||||
bool taken = false;
|
||||
|
||||
preempt_disable();
|
||||
|
||||
/* sem->wait_lock should not be held when doing optimistic spinning */
|
||||
if (!rwsem_can_spin_on_owner(sem))
|
||||
goto done;
|
||||
|
||||
if (!osq_lock(&sem->osq))
|
||||
goto done;
|
||||
|
||||
while (true) {
|
||||
owner = ACCESS_ONCE(sem->owner);
|
||||
if (owner && !rwsem_spin_on_owner(sem, owner))
|
||||
break;
|
||||
|
||||
/* wait_lock will be acquired if write_lock is obtained */
|
||||
if (rwsem_try_write_lock_unqueued(sem)) {
|
||||
taken = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* When there's no owner, we might have preempted between the
|
||||
* owner acquiring the lock and setting the owner field. If
|
||||
* we're an RT task that will live-lock because we won't let
|
||||
* the owner complete.
|
||||
*/
|
||||
if (!owner && (need_resched() || rt_task(current)))
|
||||
break;
|
||||
|
||||
/*
|
||||
* The cpu_relax() call is a compiler barrier which forces
|
||||
* everything in this loop to be re-loaded. We don't need
|
||||
* memory barriers as we'll eventually observe the right
|
||||
* values at the cost of a few extra spins.
|
||||
*/
|
||||
arch_mutex_cpu_relax();
|
||||
}
|
||||
osq_unlock(&sem->osq);
|
||||
done:
|
||||
preempt_enable();
|
||||
return taken;
|
||||
}
|
||||
|
||||
#else
|
||||
static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Wait until we successfully acquire the write lock
|
||||
*/
|
||||
__visible
|
||||
struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
|
||||
{
|
||||
long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
|
||||
long count;
|
||||
bool waiting = true; /* any queued threads before us */
|
||||
struct rwsem_waiter waiter;
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* set up my own style of waitqueue */
|
||||
waiter.task = tsk;
|
||||
/* undo write bias from down_write operation, stop active locking */
|
||||
count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
|
||||
|
||||
/* do optimistic spinning and steal lock if possible */
|
||||
if (rwsem_optimistic_spin(sem))
|
||||
return sem;
|
||||
|
||||
/*
|
||||
* Optimistic spinning failed, proceed to the slowpath
|
||||
* and block until we can acquire the sem.
|
||||
*/
|
||||
waiter.task = current;
|
||||
waiter.type = RWSEM_WAITING_FOR_WRITE;
|
||||
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
|
||||
/* account for this before adding a new element to the list */
|
||||
if (list_empty(&sem->wait_list))
|
||||
adjustment += RWSEM_WAITING_BIAS;
|
||||
waiting = false;
|
||||
|
||||
list_add_tail(&waiter.list, &sem->wait_list);
|
||||
|
||||
/* we're now waiting on the lock, but no longer actively locking */
|
||||
count = rwsem_atomic_update(adjustment, sem);
|
||||
if (waiting) {
|
||||
count = ACCESS_ONCE(sem->count);
|
||||
|
||||
/* If there were already threads queued before us and there are no
|
||||
* active writers, the lock must be read owned; so we try to wake
|
||||
* any read locks that were queued ahead of us. */
|
||||
if (count > RWSEM_WAITING_BIAS &&
|
||||
adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
|
||||
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
|
||||
/*
|
||||
* If there were already threads queued before us and there are
|
||||
* no active writers, the lock must be read owned; so we try to
|
||||
* wake any read locks that were queued ahead of us.
|
||||
*/
|
||||
if (count > RWSEM_WAITING_BIAS)
|
||||
sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
|
||||
|
||||
} else
|
||||
count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
|
||||
|
||||
/* wait until we successfully acquire the lock */
|
||||
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
while (true) {
|
||||
if (!(count & RWSEM_ACTIVE_MASK)) {
|
||||
/* Try acquiring the write lock. */
|
||||
count = RWSEM_ACTIVE_WRITE_BIAS;
|
||||
if (!list_is_singular(&sem->wait_list))
|
||||
count += RWSEM_WAITING_BIAS;
|
||||
|
||||
if (sem->count == RWSEM_WAITING_BIAS &&
|
||||
cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
|
||||
RWSEM_WAITING_BIAS)
|
||||
break;
|
||||
}
|
||||
|
||||
if (rwsem_try_write_lock(count, sem))
|
||||
break;
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
|
||||
/* Block until there are no active lockers. */
|
||||
do {
|
||||
schedule();
|
||||
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
} while ((count = sem->count) & RWSEM_ACTIVE_MASK);
|
||||
|
||||
raw_spin_lock_irq(&sem->wait_lock);
|
||||
}
|
||||
__set_current_state(TASK_RUNNING);
|
||||
|
||||
list_del(&waiter.list);
|
||||
raw_spin_unlock_irq(&sem->wait_lock);
|
||||
tsk->state = TASK_RUNNING;
|
||||
|
||||
return sem;
|
||||
}
|
||||
|
@@ -12,6 +12,27 @@
|
||||
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
|
||||
static inline void rwsem_set_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
sem->owner = current;
|
||||
}
|
||||
|
||||
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
sem->owner = NULL;
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void rwsem_set_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void rwsem_clear_owner(struct rw_semaphore *sem)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* lock for reading
|
||||
*/
|
||||
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
|
||||
rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write);
|
||||
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
|
||||
{
|
||||
int ret = __down_write_trylock(sem);
|
||||
|
||||
if (ret == 1)
|
||||
if (ret == 1) {
|
||||
rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
|
||||
{
|
||||
rwsem_release(&sem->dep_map, 1, _RET_IP_);
|
||||
|
||||
rwsem_clear_owner(sem);
|
||||
__up_write(sem);
|
||||
}
|
||||
|
||||
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
|
||||
* lockdep: a downgraded write will live on as a write
|
||||
* dependency.
|
||||
*/
|
||||
rwsem_clear_owner(sem);
|
||||
__downgrade_write(sem);
|
||||
}
|
||||
|
||||
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
|
||||
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(_down_write_nest_lock);
|
||||
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
|
||||
rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
|
||||
|
||||
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
|
||||
rwsem_set_owner(sem);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(down_write_nested);
|
||||
|
@@ -3020,21 +3020,6 @@ static int do_init_module(struct module *mod)
|
||||
*/
|
||||
current->flags &= ~PF_USED_ASYNC;
|
||||
|
||||
blocking_notifier_call_chain(&module_notify_list,
|
||||
MODULE_STATE_COMING, mod);
|
||||
|
||||
/* Set RO and NX regions for core */
|
||||
set_section_ro_nx(mod->module_core,
|
||||
mod->core_text_size,
|
||||
mod->core_ro_size,
|
||||
mod->core_size);
|
||||
|
||||
/* Set RO and NX regions for init */
|
||||
set_section_ro_nx(mod->module_init,
|
||||
mod->init_text_size,
|
||||
mod->init_ro_size,
|
||||
mod->init_size);
|
||||
|
||||
do_mod_ctors(mod);
|
||||
/* Start the module */
|
||||
if (mod->init != NULL)
|
||||
@@ -3165,9 +3150,26 @@ static int complete_formation(struct module *mod, struct load_info *info)
|
||||
/* This relies on module_mutex for list integrity. */
|
||||
module_bug_finalize(info->hdr, info->sechdrs, mod);
|
||||
|
||||
/* Set RO and NX regions for core */
|
||||
set_section_ro_nx(mod->module_core,
|
||||
mod->core_text_size,
|
||||
mod->core_ro_size,
|
||||
mod->core_size);
|
||||
|
||||
/* Set RO and NX regions for init */
|
||||
set_section_ro_nx(mod->module_init,
|
||||
mod->init_text_size,
|
||||
mod->init_ro_size,
|
||||
mod->init_size);
|
||||
|
||||
/* Mark state as coming so strong_try_module_get() ignores us,
|
||||
* but kallsyms etc. can see us. */
|
||||
mod->state = MODULE_STATE_COMING;
|
||||
mutex_unlock(&module_mutex);
|
||||
|
||||
blocking_notifier_call_chain(&module_notify_list,
|
||||
MODULE_STATE_COMING, mod);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
mutex_unlock(&module_mutex);
|
||||
@@ -3190,6 +3192,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
|
||||
{
|
||||
struct module *mod;
|
||||
long err;
|
||||
char *after_dashes;
|
||||
|
||||
err = module_sig_check(info);
|
||||
if (err)
|
||||
@@ -3277,10 +3280,15 @@ static int load_module(struct load_info *info, const char __user *uargs,
|
||||
goto ddebug_cleanup;
|
||||
|
||||
/* Module is ready to execute: parsing args may do that. */
|
||||
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
|
||||
-32768, 32767, unknown_module_param_cb);
|
||||
if (err < 0)
|
||||
after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
|
||||
-32768, 32767, unknown_module_param_cb);
|
||||
if (IS_ERR(after_dashes)) {
|
||||
err = PTR_ERR(after_dashes);
|
||||
goto bug_cleanup;
|
||||
} else if (after_dashes) {
|
||||
pr_warn("%s: parameters '%s' after `--' ignored\n",
|
||||
mod->name, after_dashes);
|
||||
}
|
||||
|
||||
/* Link in to syfs. */
|
||||
err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
|
||||
|
@@ -71,9 +71,9 @@ static int notifier_chain_unregister(struct notifier_block **nl,
|
||||
* @returns: notifier_call_chain returns the value returned by the
|
||||
* last notifier function called.
|
||||
*/
|
||||
static int __kprobes notifier_call_chain(struct notifier_block **nl,
|
||||
unsigned long val, void *v,
|
||||
int nr_to_call, int *nr_calls)
|
||||
static int notifier_call_chain(struct notifier_block **nl,
|
||||
unsigned long val, void *v,
|
||||
int nr_to_call, int *nr_calls)
|
||||
{
|
||||
int ret = NOTIFY_DONE;
|
||||
struct notifier_block *nb, *next_nb;
|
||||
@@ -102,6 +102,7 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
NOKPROBE_SYMBOL(notifier_call_chain);
|
||||
|
||||
/*
|
||||
* Atomic notifier chain routines. Registration and unregistration
|
||||
@@ -172,9 +173,9 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
|
||||
* Otherwise the return value is the return value
|
||||
* of the last notifier function called.
|
||||
*/
|
||||
int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
|
||||
unsigned long val, void *v,
|
||||
int nr_to_call, int *nr_calls)
|
||||
int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
|
||||
unsigned long val, void *v,
|
||||
int nr_to_call, int *nr_calls)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -184,13 +185,15 @@ int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
|
||||
NOKPROBE_SYMBOL(__atomic_notifier_call_chain);
|
||||
|
||||
int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
|
||||
unsigned long val, void *v)
|
||||
int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
|
||||
unsigned long val, void *v)
|
||||
{
|
||||
return __atomic_notifier_call_chain(nh, val, v, -1, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(atomic_notifier_call_chain);
|
||||
NOKPROBE_SYMBOL(atomic_notifier_call_chain);
|
||||
|
||||
/*
|
||||
* Blocking notifier chain routines. All access to the chain is
|
||||
@@ -527,7 +530,7 @@ EXPORT_SYMBOL_GPL(srcu_init_notifier_head);
|
||||
|
||||
static ATOMIC_NOTIFIER_HEAD(die_chain);
|
||||
|
||||
int notrace __kprobes notify_die(enum die_val val, const char *str,
|
||||
int notrace notify_die(enum die_val val, const char *str,
|
||||
struct pt_regs *regs, long err, int trap, int sig)
|
||||
{
|
||||
struct die_args args = {
|
||||
@@ -540,6 +543,7 @@ int notrace __kprobes notify_die(enum die_val val, const char *str,
|
||||
};
|
||||
return atomic_notifier_call_chain(&die_chain, val, &args);
|
||||
}
|
||||
NOKPROBE_SYMBOL(notify_die);
|
||||
|
||||
int register_die_notifier(struct notifier_block *nb)
|
||||
{
|
||||
|
@@ -177,13 +177,13 @@ static char *next_arg(char *args, char **param, char **val)
|
||||
}
|
||||
|
||||
/* Args looks like "foo=bar,bar2 baz=fuz wiz". */
|
||||
int parse_args(const char *doing,
|
||||
char *args,
|
||||
const struct kernel_param *params,
|
||||
unsigned num,
|
||||
s16 min_level,
|
||||
s16 max_level,
|
||||
int (*unknown)(char *param, char *val, const char *doing))
|
||||
char *parse_args(const char *doing,
|
||||
char *args,
|
||||
const struct kernel_param *params,
|
||||
unsigned num,
|
||||
s16 min_level,
|
||||
s16 max_level,
|
||||
int (*unknown)(char *param, char *val, const char *doing))
|
||||
{
|
||||
char *param, *val;
|
||||
|
||||
@@ -198,6 +198,9 @@ int parse_args(const char *doing,
|
||||
int irq_was_disabled;
|
||||
|
||||
args = next_arg(args, ¶m, &val);
|
||||
/* Stop at -- */
|
||||
if (!val && strcmp(param, "--") == 0)
|
||||
return args;
|
||||
irq_was_disabled = irqs_disabled();
|
||||
ret = parse_one(param, val, doing, params, num,
|
||||
min_level, max_level, unknown);
|
||||
@@ -208,22 +211,22 @@ int parse_args(const char *doing,
|
||||
switch (ret) {
|
||||
case -ENOENT:
|
||||
pr_err("%s: Unknown parameter `%s'\n", doing, param);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
case -ENOSPC:
|
||||
pr_err("%s: `%s' too large for parameter `%s'\n",
|
||||
doing, val ?: "", param);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
case 0:
|
||||
break;
|
||||
default:
|
||||
pr_err("%s: `%s' invalid for parameter `%s'\n",
|
||||
doing, val ?: "", param);
|
||||
return ret;
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
}
|
||||
|
||||
/* All parsed OK. */
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Lazy bastard, eh? */
|
||||
|
@@ -28,12 +28,14 @@
|
||||
#include <linux/syscore_ops.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "power.h"
|
||||
|
||||
|
||||
static int nocompress;
|
||||
static int noresume;
|
||||
static int nohibernate;
|
||||
static int resume_wait;
|
||||
static unsigned int resume_delay;
|
||||
static char resume_file[256] = CONFIG_PM_STD_PARTITION;
|
||||
@@ -61,6 +63,11 @@ bool freezer_test_done;
|
||||
|
||||
static const struct platform_hibernation_ops *hibernation_ops;
|
||||
|
||||
bool hibernation_available(void)
|
||||
{
|
||||
return (nohibernate == 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* hibernation_set_ops - Set the global hibernate operations.
|
||||
* @ops: Hibernation operations to use in subsequent hibernation transitions.
|
||||
@@ -292,7 +299,9 @@ static int create_image(int platform_mode)
|
||||
|
||||
in_suspend = 1;
|
||||
save_processor_state();
|
||||
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
|
||||
error = swsusp_arch_suspend();
|
||||
trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
|
||||
if (error)
|
||||
printk(KERN_ERR "PM: Error %d creating hibernation image\n",
|
||||
error);
|
||||
@@ -639,6 +648,11 @@ int hibernate(void)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (!hibernation_available()) {
|
||||
pr_debug("PM: Hibernation not available.\n");
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
lock_system_sleep();
|
||||
/* The snapshot device should not be opened while we're running */
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
@@ -731,7 +745,7 @@ static int software_resume(void)
|
||||
/*
|
||||
* If the user said "noresume".. bail out early.
|
||||
*/
|
||||
if (noresume)
|
||||
if (noresume || !hibernation_available())
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@@ -897,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
int i;
|
||||
char *start = buf;
|
||||
|
||||
if (!hibernation_available())
|
||||
return sprintf(buf, "[disabled]\n");
|
||||
|
||||
for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
|
||||
if (!hibernation_modes[i])
|
||||
continue;
|
||||
@@ -931,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *p;
|
||||
int mode = HIBERNATION_INVALID;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
|
||||
p = memchr(buf, '\n', n);
|
||||
len = p ? p - buf : n;
|
||||
|
||||
@@ -1098,6 +1118,10 @@ static int __init hibernate_setup(char *str)
|
||||
noresume = 1;
|
||||
else if (!strncmp(str, "nocompress", 10))
|
||||
nocompress = 1;
|
||||
else if (!strncmp(str, "no", 2)) {
|
||||
noresume = 1;
|
||||
nohibernate = 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1122,9 +1146,23 @@ static int __init resumedelay_setup(char *str)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init nohibernate_setup(char *str)
|
||||
{
|
||||
noresume = 1;
|
||||
nohibernate = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __init kaslr_nohibernate_setup(char *str)
|
||||
{
|
||||
return nohibernate_setup(str);
|
||||
}
|
||||
|
||||
__setup("noresume", noresume_setup);
|
||||
__setup("resume_offset=", resume_offset_setup);
|
||||
__setup("resume=", resume_setup);
|
||||
__setup("hibernate=", hibernate_setup);
|
||||
__setup("resumewait", resumewait_setup);
|
||||
__setup("resumedelay=", resumedelay_setup);
|
||||
__setup("nohibernate", nohibernate_setup);
|
||||
__setup("kaslr", kaslr_nohibernate_setup);
|
||||
|
@@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
s += sprintf(s,"%s ", pm_states[i].label);
|
||||
|
||||
#endif
|
||||
#ifdef CONFIG_HIBERNATION
|
||||
s += sprintf(s, "%s\n", "disk");
|
||||
#else
|
||||
if (hibernation_available())
|
||||
s += sprintf(s, "disk ");
|
||||
if (s != buf)
|
||||
/* convert the last space to a newline */
|
||||
*(s-1) = '\n';
|
||||
#endif
|
||||
return (s - buf);
|
||||
}
|
||||
|
||||
|
@@ -17,6 +17,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kmod.h>
|
||||
#include <trace/events/power.h>
|
||||
|
||||
/*
|
||||
* Timeout for stopping processes
|
||||
@@ -175,6 +176,7 @@ void thaw_processes(void)
|
||||
struct task_struct *g, *p;
|
||||
struct task_struct *curr = current;
|
||||
|
||||
trace_suspend_resume(TPS("thaw_processes"), 0, true);
|
||||
if (pm_freezing)
|
||||
atomic_dec(&system_freezing_cnt);
|
||||
pm_freezing = false;
|
||||
@@ -201,6 +203,7 @@ void thaw_processes(void)
|
||||
|
||||
schedule();
|
||||
printk("done.\n");
|
||||
trace_suspend_resume(TPS("thaw_processes"), 0, false);
|
||||
}
|
||||
|
||||
void thaw_kernel_threads(void)
|
||||
|
@@ -177,7 +177,9 @@ static int suspend_prepare(suspend_state_t state)
|
||||
if (error)
|
||||
goto Finish;
|
||||
|
||||
trace_suspend_resume(TPS("freeze_processes"), 0, true);
|
||||
error = suspend_freeze_processes();
|
||||
trace_suspend_resume(TPS("freeze_processes"), 0, false);
|
||||
if (!error)
|
||||
return 0;
|
||||
|
||||
@@ -240,7 +242,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
|
||||
* all the devices are suspended.
|
||||
*/
|
||||
if (state == PM_SUSPEND_FREEZE) {
|
||||
trace_suspend_resume(TPS("machine_suspend"), state, true);
|
||||
freeze_enter();
|
||||
trace_suspend_resume(TPS("machine_suspend"), state, false);
|
||||
goto Platform_wake;
|
||||
}
|
||||
|
||||
@@ -256,7 +260,11 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
|
||||
if (!error) {
|
||||
*wakeup = pm_wakeup_pending();
|
||||
if (!(suspend_test(TEST_CORE) || *wakeup)) {
|
||||
trace_suspend_resume(TPS("machine_suspend"),
|
||||
state, true);
|
||||
error = suspend_ops->enter(state);
|
||||
trace_suspend_resume(TPS("machine_suspend"),
|
||||
state, false);
|
||||
events_check_enabled = false;
|
||||
}
|
||||
syscore_resume();
|
||||
@@ -294,7 +302,6 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
if (need_suspend_ops(state) && !suspend_ops)
|
||||
return -ENOSYS;
|
||||
|
||||
trace_machine_suspend(state);
|
||||
if (need_suspend_ops(state) && suspend_ops->begin) {
|
||||
error = suspend_ops->begin(state);
|
||||
if (error)
|
||||
@@ -331,7 +338,6 @@ int suspend_devices_and_enter(suspend_state_t state)
|
||||
else if (state == PM_SUSPEND_FREEZE && freeze_ops->end)
|
||||
freeze_ops->end();
|
||||
|
||||
trace_machine_suspend(PWR_EVENT_EXIT);
|
||||
return error;
|
||||
|
||||
Recover_platform:
|
||||
@@ -365,6 +371,7 @@ static int enter_state(suspend_state_t state)
|
||||
{
|
||||
int error;
|
||||
|
||||
trace_suspend_resume(TPS("suspend_enter"), state, true);
|
||||
if (state == PM_SUSPEND_FREEZE) {
|
||||
#ifdef CONFIG_PM_DEBUG
|
||||
if (pm_test_level != TEST_NONE && pm_test_level <= TEST_CPUS) {
|
||||
@@ -382,9 +389,11 @@ static int enter_state(suspend_state_t state)
|
||||
if (state == PM_SUSPEND_FREEZE)
|
||||
freeze_begin();
|
||||
|
||||
trace_suspend_resume(TPS("sync_filesystems"), 0, true);
|
||||
printk(KERN_INFO "PM: Syncing filesystems ... ");
|
||||
sys_sync();
|
||||
printk("done.\n");
|
||||
trace_suspend_resume(TPS("sync_filesystems"), 0, false);
|
||||
|
||||
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state].label);
|
||||
error = suspend_prepare(state);
|
||||
@@ -394,6 +403,7 @@ static int enter_state(suspend_state_t state)
|
||||
if (suspend_test(TEST_FREEZER))
|
||||
goto Finish;
|
||||
|
||||
trace_suspend_resume(TPS("suspend_enter"), state, false);
|
||||
pr_debug("PM: Entering %s sleep\n", pm_states[state].label);
|
||||
pm_restrict_gfp_mask();
|
||||
error = suspend_devices_and_enter(state);
|
||||
|
@@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
|
||||
struct snapshot_data *data;
|
||||
int error;
|
||||
|
||||
if (!hibernation_available())
|
||||
return -EPERM;
|
||||
|
||||
lock_system_sleep();
|
||||
|
||||
if (!atomic_add_unless(&snapshot_device_available, -1, 0)) {
|
||||
|
@@ -1416,9 +1416,10 @@ static int have_callable_console(void)
|
||||
/*
|
||||
* Can we actually use the console at this time on this cpu?
|
||||
*
|
||||
* Console drivers may assume that per-cpu resources have been allocated. So
|
||||
* unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
|
||||
* call them until this CPU is officially up.
|
||||
* Console drivers may assume that per-cpu resources have
|
||||
* been allocated. So unless they're explicitly marked as
|
||||
* being able to cope (CON_ANYTIME) don't call them until
|
||||
* this CPU is officially up.
|
||||
*/
|
||||
static inline int can_use_console(unsigned int cpu)
|
||||
{
|
||||
@@ -1431,10 +1432,8 @@ static inline int can_use_console(unsigned int cpu)
|
||||
* console_lock held, and 'console_locked' set) if it
|
||||
* is successful, false otherwise.
|
||||
*/
|
||||
static int console_trylock_for_printk(void)
|
||||
static int console_trylock_for_printk(unsigned int cpu)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
if (!console_trylock())
|
||||
return 0;
|
||||
/*
|
||||
@@ -1609,8 +1608,7 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
*/
|
||||
if (!oops_in_progress && !lockdep_recursing(current)) {
|
||||
recursion_bug = 1;
|
||||
local_irq_restore(flags);
|
||||
return 0;
|
||||
goto out_restore_irqs;
|
||||
}
|
||||
zap_locks();
|
||||
}
|
||||
@@ -1718,27 +1716,21 @@ asmlinkage int vprintk_emit(int facility, int level,
|
||||
|
||||
logbuf_cpu = UINT_MAX;
|
||||
raw_spin_unlock(&logbuf_lock);
|
||||
lockdep_on();
|
||||
local_irq_restore(flags);
|
||||
|
||||
/* If called from the scheduler, we can not call up(). */
|
||||
if (in_sched)
|
||||
return printed_len;
|
||||
|
||||
/*
|
||||
* Disable preemption to avoid being preempted while holding
|
||||
* console_sem which would prevent anyone from printing to console
|
||||
*/
|
||||
preempt_disable();
|
||||
/*
|
||||
* Try to acquire and then immediately release the console semaphore.
|
||||
* The release will print out buffers and wake up /dev/kmsg and syslog()
|
||||
* users.
|
||||
*/
|
||||
if (console_trylock_for_printk())
|
||||
console_unlock();
|
||||
preempt_enable();
|
||||
if (!in_sched) {
|
||||
/*
|
||||
* Try to acquire and then immediately release the console
|
||||
* semaphore. The release will print out buffers and wake up
|
||||
* /dev/kmsg and syslog() users.
|
||||
*/
|
||||
if (console_trylock_for_printk(this_cpu))
|
||||
console_unlock();
|
||||
}
|
||||
|
||||
lockdep_on();
|
||||
out_restore_irqs:
|
||||
local_irq_restore(flags);
|
||||
return printed_len;
|
||||
}
|
||||
EXPORT_SYMBOL(vprintk_emit);
|
||||
|
@@ -535,7 +535,7 @@ static inline void init_hrtick(void)
|
||||
__old; \
|
||||
})
|
||||
|
||||
#ifdef TIF_POLLING_NRFLAG
|
||||
#if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
|
||||
/*
|
||||
* Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
|
||||
* this avoids any races wrt polling state changes and thereby avoids
|
||||
@@ -546,12 +546,44 @@ static bool set_nr_and_not_polling(struct task_struct *p)
|
||||
struct thread_info *ti = task_thread_info(p);
|
||||
return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically set TIF_NEED_RESCHED if TIF_POLLING_NRFLAG is set.
|
||||
*
|
||||
* If this returns true, then the idle task promises to call
|
||||
* sched_ttwu_pending() and reschedule soon.
|
||||
*/
|
||||
static bool set_nr_if_polling(struct task_struct *p)
|
||||
{
|
||||
struct thread_info *ti = task_thread_info(p);
|
||||
typeof(ti->flags) old, val = ACCESS_ONCE(ti->flags);
|
||||
|
||||
for (;;) {
|
||||
if (!(val & _TIF_POLLING_NRFLAG))
|
||||
return false;
|
||||
if (val & _TIF_NEED_RESCHED)
|
||||
return true;
|
||||
old = cmpxchg(&ti->flags, val, val | _TIF_NEED_RESCHED);
|
||||
if (old == val)
|
||||
break;
|
||||
val = old;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#else
|
||||
static bool set_nr_and_not_polling(struct task_struct *p)
|
||||
{
|
||||
set_tsk_need_resched(p);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static bool set_nr_if_polling(struct task_struct *p)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -580,6 +612,8 @@ void resched_task(struct task_struct *p)
|
||||
|
||||
if (set_nr_and_not_polling(p))
|
||||
smp_send_reschedule(cpu);
|
||||
else
|
||||
trace_sched_wake_idle_without_ipi(cpu);
|
||||
}
|
||||
|
||||
void resched_cpu(int cpu)
|
||||
@@ -642,27 +676,10 @@ static void wake_up_idle_cpu(int cpu)
|
||||
if (cpu == smp_processor_id())
|
||||
return;
|
||||
|
||||
/*
|
||||
* This is safe, as this function is called with the timer
|
||||
* wheel base lock of (cpu) held. When the CPU is on the way
|
||||
* to idle and has not yet set rq->curr to idle then it will
|
||||
* be serialized on the timer wheel base lock and take the new
|
||||
* timer into account automatically.
|
||||
*/
|
||||
if (rq->curr != rq->idle)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We can set TIF_RESCHED on the idle task of the other CPU
|
||||
* lockless. The worst case is that the other CPU runs the
|
||||
* idle task through an additional NOOP schedule()
|
||||
*/
|
||||
set_tsk_need_resched(rq->idle);
|
||||
|
||||
/* NEED_RESCHED must be visible before we test polling */
|
||||
smp_mb();
|
||||
if (!tsk_is_polling(rq->idle))
|
||||
if (set_nr_and_not_polling(rq->idle))
|
||||
smp_send_reschedule(cpu);
|
||||
else
|
||||
trace_sched_wake_idle_without_ipi(cpu);
|
||||
}
|
||||
|
||||
static bool wake_up_full_nohz_cpu(int cpu)
|
||||
@@ -888,7 +905,7 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
|
||||
rq->clock_task += delta;
|
||||
|
||||
#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
|
||||
if ((irq_delta + steal) && sched_feat(NONTASK_POWER))
|
||||
if ((irq_delta + steal) && sched_feat(NONTASK_CAPACITY))
|
||||
sched_rt_avg_update(rq, irq_delta + steal);
|
||||
#endif
|
||||
}
|
||||
@@ -1521,13 +1538,17 @@ static int ttwu_remote(struct task_struct *p, int wake_flags)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static void sched_ttwu_pending(void)
|
||||
void sched_ttwu_pending(void)
|
||||
{
|
||||
struct rq *rq = this_rq();
|
||||
struct llist_node *llist = llist_del_all(&rq->wake_list);
|
||||
struct task_struct *p;
|
||||
unsigned long flags;
|
||||
|
||||
raw_spin_lock(&rq->lock);
|
||||
if (!llist)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&rq->lock, flags);
|
||||
|
||||
while (llist) {
|
||||
p = llist_entry(llist, struct task_struct, wake_entry);
|
||||
@@ -1535,7 +1556,7 @@ static void sched_ttwu_pending(void)
|
||||
ttwu_do_activate(rq, p, 0);
|
||||
}
|
||||
|
||||
raw_spin_unlock(&rq->lock);
|
||||
raw_spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
void scheduler_ipi(void)
|
||||
@@ -1581,8 +1602,14 @@ void scheduler_ipi(void)
|
||||
|
||||
static void ttwu_queue_remote(struct task_struct *p, int cpu)
|
||||
{
|
||||
if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list))
|
||||
smp_send_reschedule(cpu);
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
if (llist_add(&p->wake_entry, &cpu_rq(cpu)->wake_list)) {
|
||||
if (!set_nr_if_polling(rq->idle))
|
||||
smp_send_reschedule(cpu);
|
||||
else
|
||||
trace_sched_wake_idle_without_ipi(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
bool cpus_share_cache(int this_cpu, int that_cpu)
|
||||
@@ -2527,7 +2554,7 @@ notrace unsigned long get_parent_ip(unsigned long addr)
|
||||
#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
|
||||
defined(CONFIG_PREEMPT_TRACER))
|
||||
|
||||
void __kprobes preempt_count_add(int val)
|
||||
void preempt_count_add(int val)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
@@ -2553,8 +2580,9 @@ void __kprobes preempt_count_add(int val)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(preempt_count_add);
|
||||
NOKPROBE_SYMBOL(preempt_count_add);
|
||||
|
||||
void __kprobes preempt_count_sub(int val)
|
||||
void preempt_count_sub(int val)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_PREEMPT
|
||||
/*
|
||||
@@ -2575,6 +2603,7 @@ void __kprobes preempt_count_sub(int val)
|
||||
__preempt_count_sub(val);
|
||||
}
|
||||
EXPORT_SYMBOL(preempt_count_sub);
|
||||
NOKPROBE_SYMBOL(preempt_count_sub);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -2857,6 +2886,7 @@ asmlinkage __visible void __sched notrace preempt_schedule(void)
|
||||
barrier();
|
||||
} while (need_resched());
|
||||
}
|
||||
NOKPROBE_SYMBOL(preempt_schedule);
|
||||
EXPORT_SYMBOL(preempt_schedule);
|
||||
#endif /* CONFIG_PREEMPT */
|
||||
|
||||
@@ -4216,7 +4246,7 @@ EXPORT_SYMBOL(yield);
|
||||
* false (0) if we failed to boost the target.
|
||||
* -ESRCH if there's no task to yield to.
|
||||
*/
|
||||
bool __sched yield_to(struct task_struct *p, bool preempt)
|
||||
int __sched yield_to(struct task_struct *p, bool preempt)
|
||||
{
|
||||
struct task_struct *curr = current;
|
||||
struct rq *rq, *p_rq;
|
||||
@@ -5242,14 +5272,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
}
|
||||
|
||||
/*
|
||||
* Even though we initialize ->power to something semi-sane,
|
||||
* we leave power_orig unset. This allows us to detect if
|
||||
* Even though we initialize ->capacity to something semi-sane,
|
||||
* we leave capacity_orig unset. This allows us to detect if
|
||||
* domain iteration is still funny without causing /0 traps.
|
||||
*/
|
||||
if (!group->sgp->power_orig) {
|
||||
if (!group->sgc->capacity_orig) {
|
||||
printk(KERN_CONT "\n");
|
||||
printk(KERN_ERR "ERROR: domain->cpu_power not "
|
||||
"set\n");
|
||||
printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -5271,9 +5300,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
|
||||
cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
|
||||
|
||||
printk(KERN_CONT " %s", str);
|
||||
if (group->sgp->power != SCHED_POWER_SCALE) {
|
||||
printk(KERN_CONT " (cpu_power = %d)",
|
||||
group->sgp->power);
|
||||
if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
|
||||
printk(KERN_CONT " (cpu_capacity = %d)",
|
||||
group->sgc->capacity);
|
||||
}
|
||||
|
||||
group = group->next;
|
||||
@@ -5331,7 +5360,7 @@ static int sd_degenerate(struct sched_domain *sd)
|
||||
SD_BALANCE_NEWIDLE |
|
||||
SD_BALANCE_FORK |
|
||||
SD_BALANCE_EXEC |
|
||||
SD_SHARE_CPUPOWER |
|
||||
SD_SHARE_CPUCAPACITY |
|
||||
SD_SHARE_PKG_RESOURCES |
|
||||
SD_SHARE_POWERDOMAIN)) {
|
||||
if (sd->groups != sd->groups->next)
|
||||
@@ -5362,7 +5391,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
|
||||
SD_BALANCE_NEWIDLE |
|
||||
SD_BALANCE_FORK |
|
||||
SD_BALANCE_EXEC |
|
||||
SD_SHARE_CPUPOWER |
|
||||
SD_SHARE_CPUCAPACITY |
|
||||
SD_SHARE_PKG_RESOURCES |
|
||||
SD_PREFER_SIBLING |
|
||||
SD_SHARE_POWERDOMAIN);
|
||||
@@ -5487,7 +5516,7 @@ static struct root_domain *alloc_rootdomain(void)
|
||||
return rd;
|
||||
}
|
||||
|
||||
static void free_sched_groups(struct sched_group *sg, int free_sgp)
|
||||
static void free_sched_groups(struct sched_group *sg, int free_sgc)
|
||||
{
|
||||
struct sched_group *tmp, *first;
|
||||
|
||||
@@ -5498,8 +5527,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp)
|
||||
do {
|
||||
tmp = sg->next;
|
||||
|
||||
if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
|
||||
kfree(sg->sgp);
|
||||
if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
|
||||
kfree(sg->sgc);
|
||||
|
||||
kfree(sg);
|
||||
sg = tmp;
|
||||
@@ -5517,7 +5546,7 @@ static void free_sched_domain(struct rcu_head *rcu)
|
||||
if (sd->flags & SD_OVERLAP) {
|
||||
free_sched_groups(sd->groups, 1);
|
||||
} else if (atomic_dec_and_test(&sd->groups->ref)) {
|
||||
kfree(sd->groups->sgp);
|
||||
kfree(sd->groups->sgc);
|
||||
kfree(sd->groups);
|
||||
}
|
||||
kfree(sd);
|
||||
@@ -5728,17 +5757,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
|
||||
|
||||
cpumask_or(covered, covered, sg_span);
|
||||
|
||||
sg->sgp = *per_cpu_ptr(sdd->sgp, i);
|
||||
if (atomic_inc_return(&sg->sgp->ref) == 1)
|
||||
sg->sgc = *per_cpu_ptr(sdd->sgc, i);
|
||||
if (atomic_inc_return(&sg->sgc->ref) == 1)
|
||||
build_group_mask(sd, sg);
|
||||
|
||||
/*
|
||||
* Initialize sgp->power such that even if we mess up the
|
||||
* Initialize sgc->capacity such that even if we mess up the
|
||||
* domains and no possible iteration will get us here, we won't
|
||||
* die on a /0 trap.
|
||||
*/
|
||||
sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span);
|
||||
sg->sgp->power_orig = sg->sgp->power;
|
||||
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
|
||||
sg->sgc->capacity_orig = sg->sgc->capacity;
|
||||
|
||||
/*
|
||||
* Make sure the first group of this domain contains the
|
||||
@@ -5776,8 +5805,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
|
||||
|
||||
if (sg) {
|
||||
*sg = *per_cpu_ptr(sdd->sg, cpu);
|
||||
(*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
|
||||
atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
|
||||
(*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu);
|
||||
atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */
|
||||
}
|
||||
|
||||
return cpu;
|
||||
@@ -5786,7 +5815,7 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
|
||||
/*
|
||||
* build_sched_groups will build a circular linked list of the groups
|
||||
* covered by the given span, and will set each group's ->cpumask correctly,
|
||||
* and ->cpu_power to 0.
|
||||
* and ->cpu_capacity to 0.
|
||||
*
|
||||
* Assumes the sched_domain tree is fully constructed
|
||||
*/
|
||||
@@ -5840,16 +5869,16 @@ build_sched_groups(struct sched_domain *sd, int cpu)
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize sched groups cpu_power.
|
||||
* Initialize sched groups cpu_capacity.
|
||||
*
|
||||
* cpu_power indicates the capacity of sched group, which is used while
|
||||
* cpu_capacity indicates the capacity of sched group, which is used while
|
||||
* distributing the load between different sched groups in a sched domain.
|
||||
* Typically cpu_power for all the groups in a sched domain will be same unless
|
||||
* there are asymmetries in the topology. If there are asymmetries, group
|
||||
* having more cpu_power will pickup more load compared to the group having
|
||||
* less cpu_power.
|
||||
* Typically cpu_capacity for all the groups in a sched domain will be same
|
||||
* unless there are asymmetries in the topology. If there are asymmetries,
|
||||
* group having more cpu_capacity will pickup more load compared to the
|
||||
* group having less cpu_capacity.
|
||||
*/
|
||||
static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||
static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
|
||||
{
|
||||
struct sched_group *sg = sd->groups;
|
||||
|
||||
@@ -5863,8 +5892,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
|
||||
if (cpu != group_balance_cpu(sg))
|
||||
return;
|
||||
|
||||
update_group_power(sd, cpu);
|
||||
atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
|
||||
update_group_capacity(sd, cpu);
|
||||
atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5955,8 +5984,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
|
||||
if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
|
||||
*per_cpu_ptr(sdd->sg, cpu) = NULL;
|
||||
|
||||
if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
|
||||
*per_cpu_ptr(sdd->sgp, cpu) = NULL;
|
||||
if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
|
||||
*per_cpu_ptr(sdd->sgc, cpu) = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@@ -5969,7 +5998,7 @@ static int sched_domains_curr_level;
|
||||
/*
|
||||
* SD_flags allowed in topology descriptions.
|
||||
*
|
||||
* SD_SHARE_CPUPOWER - describes SMT topologies
|
||||
* SD_SHARE_CPUCAPACITY - describes SMT topologies
|
||||
* SD_SHARE_PKG_RESOURCES - describes shared caches
|
||||
* SD_NUMA - describes NUMA topologies
|
||||
* SD_SHARE_POWERDOMAIN - describes shared power domain
|
||||
@@ -5978,7 +6007,7 @@ static int sched_domains_curr_level;
|
||||
* SD_ASYM_PACKING - describes SMT quirks
|
||||
*/
|
||||
#define TOPOLOGY_SD_FLAGS \
|
||||
(SD_SHARE_CPUPOWER | \
|
||||
(SD_SHARE_CPUCAPACITY | \
|
||||
SD_SHARE_PKG_RESOURCES | \
|
||||
SD_NUMA | \
|
||||
SD_ASYM_PACKING | \
|
||||
@@ -6024,7 +6053,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
|
||||
| 1*SD_BALANCE_FORK
|
||||
| 0*SD_BALANCE_WAKE
|
||||
| 1*SD_WAKE_AFFINE
|
||||
| 0*SD_SHARE_CPUPOWER
|
||||
| 0*SD_SHARE_CPUCAPACITY
|
||||
| 0*SD_SHARE_PKG_RESOURCES
|
||||
| 0*SD_SERIALIZE
|
||||
| 0*SD_PREFER_SIBLING
|
||||
@@ -6046,7 +6075,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
|
||||
* Convert topological properties into behaviour.
|
||||
*/
|
||||
|
||||
if (sd->flags & SD_SHARE_CPUPOWER) {
|
||||
if (sd->flags & SD_SHARE_CPUCAPACITY) {
|
||||
sd->imbalance_pct = 110;
|
||||
sd->smt_gain = 1178; /* ~15% */
|
||||
|
||||
@@ -6358,14 +6387,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
|
||||
if (!sdd->sg)
|
||||
return -ENOMEM;
|
||||
|
||||
sdd->sgp = alloc_percpu(struct sched_group_power *);
|
||||
if (!sdd->sgp)
|
||||
sdd->sgc = alloc_percpu(struct sched_group_capacity *);
|
||||
if (!sdd->sgc)
|
||||
return -ENOMEM;
|
||||
|
||||
for_each_cpu(j, cpu_map) {
|
||||
struct sched_domain *sd;
|
||||
struct sched_group *sg;
|
||||
struct sched_group_power *sgp;
|
||||
struct sched_group_capacity *sgc;
|
||||
|
||||
sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
|
||||
GFP_KERNEL, cpu_to_node(j));
|
||||
@@ -6383,12 +6412,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
|
||||
|
||||
*per_cpu_ptr(sdd->sg, j) = sg;
|
||||
|
||||
sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
|
||||
sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
|
||||
GFP_KERNEL, cpu_to_node(j));
|
||||
if (!sgp)
|
||||
if (!sgc)
|
||||
return -ENOMEM;
|
||||
|
||||
*per_cpu_ptr(sdd->sgp, j) = sgp;
|
||||
*per_cpu_ptr(sdd->sgc, j) = sgc;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6415,15 +6444,15 @@ static void __sdt_free(const struct cpumask *cpu_map)
|
||||
|
||||
if (sdd->sg)
|
||||
kfree(*per_cpu_ptr(sdd->sg, j));
|
||||
if (sdd->sgp)
|
||||
kfree(*per_cpu_ptr(sdd->sgp, j));
|
||||
if (sdd->sgc)
|
||||
kfree(*per_cpu_ptr(sdd->sgc, j));
|
||||
}
|
||||
free_percpu(sdd->sd);
|
||||
sdd->sd = NULL;
|
||||
free_percpu(sdd->sg);
|
||||
sdd->sg = NULL;
|
||||
free_percpu(sdd->sgp);
|
||||
sdd->sgp = NULL;
|
||||
free_percpu(sdd->sgc);
|
||||
sdd->sgc = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6493,14 +6522,14 @@ static int build_sched_domains(const struct cpumask *cpu_map,
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate CPU power for physical packages and nodes */
|
||||
/* Calculate CPU capacity for physical packages and nodes */
|
||||
for (i = nr_cpumask_bits-1; i >= 0; i--) {
|
||||
if (!cpumask_test_cpu(i, cpu_map))
|
||||
continue;
|
||||
|
||||
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
|
||||
claim_allocations(i, sd);
|
||||
init_sched_groups_power(i, sd);
|
||||
init_sched_groups_capacity(i, sd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6943,7 +6972,7 @@ void __init sched_init(void)
|
||||
#ifdef CONFIG_SMP
|
||||
rq->sd = NULL;
|
||||
rq->rd = NULL;
|
||||
rq->cpu_power = SCHED_POWER_SCALE;
|
||||
rq->cpu_capacity = SCHED_CAPACITY_SCALE;
|
||||
rq->post_schedule = 0;
|
||||
rq->active_balance = 0;
|
||||
rq->next_balance = jiffies;
|
||||
@@ -7669,7 +7698,7 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct task_group *tg = css_tg(css);
|
||||
struct task_group *parent = css_tg(css_parent(css));
|
||||
struct task_group *parent = css_tg(css->parent);
|
||||
|
||||
if (parent)
|
||||
sched_online_group(tg, parent);
|
||||
|
@@ -46,7 +46,7 @@ static inline struct cpuacct *task_ca(struct task_struct *tsk)
|
||||
|
||||
static inline struct cpuacct *parent_ca(struct cpuacct *ca)
|
||||
{
|
||||
return css_ca(css_parent(&ca->css));
|
||||
return css_ca(ca->css.parent);
|
||||
}
|
||||
|
||||
static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage);
|
||||
|
@@ -57,8 +57,6 @@ void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime)
|
||||
dl_b->dl_runtime = runtime;
|
||||
}
|
||||
|
||||
extern unsigned long to_ratio(u64 period, u64 runtime);
|
||||
|
||||
void init_dl_bw(struct dl_bw *dl_b)
|
||||
{
|
||||
raw_spin_lock_init(&dl_b->lock);
|
||||
|
@@ -1017,7 +1017,7 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
|
||||
static unsigned long weighted_cpuload(const int cpu);
|
||||
static unsigned long source_load(int cpu, int type);
|
||||
static unsigned long target_load(int cpu, int type);
|
||||
static unsigned long power_of(int cpu);
|
||||
static unsigned long capacity_of(int cpu);
|
||||
static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
|
||||
|
||||
/* Cached statistics for all CPUs within a node */
|
||||
@@ -1026,11 +1026,11 @@ struct numa_stats {
|
||||
unsigned long load;
|
||||
|
||||
/* Total compute capacity of CPUs on a node */
|
||||
unsigned long power;
|
||||
unsigned long compute_capacity;
|
||||
|
||||
/* Approximate capacity in terms of runnable tasks on a node */
|
||||
unsigned long capacity;
|
||||
int has_capacity;
|
||||
unsigned long task_capacity;
|
||||
int has_free_capacity;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1046,7 +1046,7 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
|
||||
|
||||
ns->nr_running += rq->nr_running;
|
||||
ns->load += weighted_cpuload(cpu);
|
||||
ns->power += power_of(cpu);
|
||||
ns->compute_capacity += capacity_of(cpu);
|
||||
|
||||
cpus++;
|
||||
}
|
||||
@@ -1056,15 +1056,16 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
|
||||
* the @ns structure is NULL'ed and task_numa_compare() will
|
||||
* not find this node attractive.
|
||||
*
|
||||
* We'll either bail at !has_capacity, or we'll detect a huge imbalance
|
||||
* and bail there.
|
||||
* We'll either bail at !has_free_capacity, or we'll detect a huge
|
||||
* imbalance and bail there.
|
||||
*/
|
||||
if (!cpus)
|
||||
return;
|
||||
|
||||
ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
|
||||
ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
|
||||
ns->has_capacity = (ns->nr_running < ns->capacity);
|
||||
ns->load = (ns->load * SCHED_CAPACITY_SCALE) / ns->compute_capacity;
|
||||
ns->task_capacity =
|
||||
DIV_ROUND_CLOSEST(ns->compute_capacity, SCHED_CAPACITY_SCALE);
|
||||
ns->has_free_capacity = (ns->nr_running < ns->task_capacity);
|
||||
}
|
||||
|
||||
struct task_numa_env {
|
||||
@@ -1195,8 +1196,8 @@ static void task_numa_compare(struct task_numa_env *env,
|
||||
|
||||
if (!cur) {
|
||||
/* Is there capacity at our destination? */
|
||||
if (env->src_stats.has_capacity &&
|
||||
!env->dst_stats.has_capacity)
|
||||
if (env->src_stats.has_free_capacity &&
|
||||
!env->dst_stats.has_free_capacity)
|
||||
goto unlock;
|
||||
|
||||
goto balance;
|
||||
@@ -1213,7 +1214,7 @@ balance:
|
||||
orig_dst_load = env->dst_stats.load;
|
||||
orig_src_load = env->src_stats.load;
|
||||
|
||||
/* XXX missing power terms */
|
||||
/* XXX missing capacity terms */
|
||||
load = task_h_load(env->p);
|
||||
dst_load = orig_dst_load + load;
|
||||
src_load = orig_src_load - load;
|
||||
@@ -1301,8 +1302,8 @@ static int task_numa_migrate(struct task_struct *p)
|
||||
groupimp = group_weight(p, env.dst_nid) - groupweight;
|
||||
update_numa_stats(&env.dst_stats, env.dst_nid);
|
||||
|
||||
/* If the preferred nid has capacity, try to use it. */
|
||||
if (env.dst_stats.has_capacity)
|
||||
/* If the preferred nid has free capacity, try to use it. */
|
||||
if (env.dst_stats.has_free_capacity)
|
||||
task_numa_find_cpu(&env, taskimp, groupimp);
|
||||
|
||||
/* No space available on the preferred nid. Look elsewhere. */
|
||||
@@ -3225,10 +3226,12 @@ static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||
* has not truly expired.
|
||||
*
|
||||
* Fortunately we can check determine whether this the case by checking
|
||||
* whether the global deadline has advanced.
|
||||
* whether the global deadline has advanced. It is valid to compare
|
||||
* cfs_b->runtime_expires without any locks since we only care about
|
||||
* exact equality, so a partial write will still work.
|
||||
*/
|
||||
|
||||
if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
|
||||
if (cfs_rq->runtime_expires != cfs_b->runtime_expires) {
|
||||
/* extend local deadline, drift is bounded above by 2 ticks */
|
||||
cfs_rq->runtime_expires += TICK_NSEC;
|
||||
} else {
|
||||
@@ -3457,21 +3460,21 @@ next:
|
||||
static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
{
|
||||
u64 runtime, runtime_expires;
|
||||
int idle = 1, throttled;
|
||||
int throttled;
|
||||
|
||||
raw_spin_lock(&cfs_b->lock);
|
||||
/* no need to continue the timer with no bandwidth constraint */
|
||||
if (cfs_b->quota == RUNTIME_INF)
|
||||
goto out_unlock;
|
||||
goto out_deactivate;
|
||||
|
||||
throttled = !list_empty(&cfs_b->throttled_cfs_rq);
|
||||
/* idle depends on !throttled (for the case of a large deficit) */
|
||||
idle = cfs_b->idle && !throttled;
|
||||
cfs_b->nr_periods += overrun;
|
||||
|
||||
/* if we're going inactive then everything else can be deferred */
|
||||
if (idle)
|
||||
goto out_unlock;
|
||||
/*
|
||||
* idle depends on !throttled (for the case of a large deficit), and if
|
||||
* we're going inactive then everything else can be deferred
|
||||
*/
|
||||
if (cfs_b->idle && !throttled)
|
||||
goto out_deactivate;
|
||||
|
||||
/*
|
||||
* if we have relooped after returning idle once, we need to update our
|
||||
@@ -3485,7 +3488,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
if (!throttled) {
|
||||
/* mark as potentially idle for the upcoming period */
|
||||
cfs_b->idle = 1;
|
||||
goto out_unlock;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* account preceding periods in which throttling occurred */
|
||||
@@ -3525,12 +3528,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
|
||||
* timer to remain active while there are any throttled entities.)
|
||||
*/
|
||||
cfs_b->idle = 0;
|
||||
out_unlock:
|
||||
if (idle)
|
||||
cfs_b->timer_active = 0;
|
||||
raw_spin_unlock(&cfs_b->lock);
|
||||
|
||||
return idle;
|
||||
return 0;
|
||||
|
||||
out_deactivate:
|
||||
cfs_b->timer_active = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* a cfs_rq won't donate quota below this amount */
|
||||
@@ -3707,6 +3710,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||
int overrun;
|
||||
int idle = 0;
|
||||
|
||||
raw_spin_lock(&cfs_b->lock);
|
||||
for (;;) {
|
||||
now = hrtimer_cb_get_time(timer);
|
||||
overrun = hrtimer_forward(timer, now, cfs_b->period);
|
||||
@@ -3716,6 +3720,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
|
||||
|
||||
idle = do_sched_cfs_period_timer(cfs_b, overrun);
|
||||
}
|
||||
raw_spin_unlock(&cfs_b->lock);
|
||||
|
||||
return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
|
||||
}
|
||||
@@ -3775,8 +3780,6 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
struct cfs_rq *cfs_rq;
|
||||
|
||||
for_each_leaf_cfs_rq(rq, cfs_rq) {
|
||||
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
|
||||
|
||||
if (!cfs_rq->runtime_enabled)
|
||||
continue;
|
||||
|
||||
@@ -3784,7 +3787,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
|
||||
* clock_task is not advancing so we just need to make sure
|
||||
* there's some valid quota amount
|
||||
*/
|
||||
cfs_rq->runtime_remaining = cfs_b->quota;
|
||||
cfs_rq->runtime_remaining = 1;
|
||||
if (cfs_rq_throttled(cfs_rq))
|
||||
unthrottle_cfs_rq(cfs_rq);
|
||||
}
|
||||
@@ -4041,9 +4044,9 @@ static unsigned long target_load(int cpu, int type)
|
||||
return max(rq->cpu_load[type-1], total);
|
||||
}
|
||||
|
||||
static unsigned long power_of(int cpu)
|
||||
static unsigned long capacity_of(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_power;
|
||||
return cpu_rq(cpu)->cpu_capacity;
|
||||
}
|
||||
|
||||
static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
@@ -4065,7 +4068,7 @@ static void record_wakee(struct task_struct *p)
|
||||
* about the boundary, really active task won't care
|
||||
* about the loss.
|
||||
*/
|
||||
if (jiffies > current->wakee_flip_decay_ts + HZ) {
|
||||
if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
|
||||
current->wakee_flips >>= 1;
|
||||
current->wakee_flip_decay_ts = jiffies;
|
||||
}
|
||||
@@ -4286,12 +4289,12 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
|
||||
s64 this_eff_load, prev_eff_load;
|
||||
|
||||
this_eff_load = 100;
|
||||
this_eff_load *= power_of(prev_cpu);
|
||||
this_eff_load *= capacity_of(prev_cpu);
|
||||
this_eff_load *= this_load +
|
||||
effective_load(tg, this_cpu, weight, weight);
|
||||
|
||||
prev_eff_load = 100 + (sd->imbalance_pct - 100) / 2;
|
||||
prev_eff_load *= power_of(this_cpu);
|
||||
prev_eff_load *= capacity_of(this_cpu);
|
||||
prev_eff_load *= load + effective_load(tg, prev_cpu, 0, weight);
|
||||
|
||||
balanced = this_eff_load <= prev_eff_load;
|
||||
@@ -4367,8 +4370,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
|
||||
avg_load += load;
|
||||
}
|
||||
|
||||
/* Adjust by relative CPU power of the group */
|
||||
avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power;
|
||||
/* Adjust by relative CPU capacity of the group */
|
||||
avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
|
||||
|
||||
if (local_group) {
|
||||
this_load = avg_load;
|
||||
@@ -4948,14 +4951,14 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
|
||||
*
|
||||
* W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0 (3)
|
||||
*
|
||||
* P_i is the cpu power (or compute capacity) of cpu i, typically it is the
|
||||
* C_i is the compute capacity of cpu i, typically it is the
|
||||
* fraction of 'recent' time available for SCHED_OTHER task execution. But it
|
||||
* can also include other factors [XXX].
|
||||
*
|
||||
* To achieve this balance we define a measure of imbalance which follows
|
||||
* directly from (1):
|
||||
*
|
||||
* imb_i,j = max{ avg(W/P), W_i/P_i } - min{ avg(W/P), W_j/P_j } (4)
|
||||
* imb_i,j = max{ avg(W/C), W_i/C_i } - min{ avg(W/C), W_j/C_j } (4)
|
||||
*
|
||||
* We them move tasks around to minimize the imbalance. In the continuous
|
||||
* function space it is obvious this converges, in the discrete case we get
|
||||
@@ -5530,13 +5533,13 @@ struct sg_lb_stats {
|
||||
unsigned long group_load; /* Total load over the CPUs of the group */
|
||||
unsigned long sum_weighted_load; /* Weighted load of group's tasks */
|
||||
unsigned long load_per_task;
|
||||
unsigned long group_power;
|
||||
unsigned long group_capacity;
|
||||
unsigned int sum_nr_running; /* Nr tasks running in the group */
|
||||
unsigned int group_capacity;
|
||||
unsigned int group_capacity_factor;
|
||||
unsigned int idle_cpus;
|
||||
unsigned int group_weight;
|
||||
int group_imb; /* Is there an imbalance in the group ? */
|
||||
int group_has_capacity; /* Is there extra capacity in the group? */
|
||||
int group_has_free_capacity;
|
||||
#ifdef CONFIG_NUMA_BALANCING
|
||||
unsigned int nr_numa_running;
|
||||
unsigned int nr_preferred_running;
|
||||
@@ -5551,7 +5554,7 @@ struct sd_lb_stats {
|
||||
struct sched_group *busiest; /* Busiest group in this sd */
|
||||
struct sched_group *local; /* Local group in this sd */
|
||||
unsigned long total_load; /* Total load of all groups in sd */
|
||||
unsigned long total_pwr; /* Total power of all groups in sd */
|
||||
unsigned long total_capacity; /* Total capacity of all groups in sd */
|
||||
unsigned long avg_load; /* Average load across all groups in sd */
|
||||
|
||||
struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
|
||||
@@ -5570,7 +5573,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
|
||||
.busiest = NULL,
|
||||
.local = NULL,
|
||||
.total_load = 0UL,
|
||||
.total_pwr = 0UL,
|
||||
.total_capacity = 0UL,
|
||||
.busiest_stat = {
|
||||
.avg_load = 0UL,
|
||||
},
|
||||
@@ -5605,17 +5608,17 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
|
||||
return load_idx;
|
||||
}
|
||||
|
||||
static unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu)
|
||||
static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return SCHED_POWER_SCALE;
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
|
||||
unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
|
||||
unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return default_scale_freq_power(sd, cpu);
|
||||
return default_scale_capacity(sd, cpu);
|
||||
}
|
||||
|
||||
static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
|
||||
static unsigned long default_scale_smt_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long weight = sd->span_weight;
|
||||
unsigned long smt_gain = sd->smt_gain;
|
||||
@@ -5625,12 +5628,12 @@ static unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
|
||||
return smt_gain;
|
||||
}
|
||||
|
||||
unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
|
||||
unsigned long __weak arch_scale_smt_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
return default_scale_smt_power(sd, cpu);
|
||||
return default_scale_smt_capacity(sd, cpu);
|
||||
}
|
||||
|
||||
static unsigned long scale_rt_power(int cpu)
|
||||
static unsigned long scale_rt_capacity(int cpu)
|
||||
{
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
u64 total, available, age_stamp, avg;
|
||||
@@ -5650,71 +5653,71 @@ static unsigned long scale_rt_power(int cpu)
|
||||
total = sched_avg_period() + delta;
|
||||
|
||||
if (unlikely(total < avg)) {
|
||||
/* Ensures that power won't end up being negative */
|
||||
/* Ensures that capacity won't end up being negative */
|
||||
available = 0;
|
||||
} else {
|
||||
available = total - avg;
|
||||
}
|
||||
|
||||
if (unlikely((s64)total < SCHED_POWER_SCALE))
|
||||
total = SCHED_POWER_SCALE;
|
||||
if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
|
||||
total = SCHED_CAPACITY_SCALE;
|
||||
|
||||
total >>= SCHED_POWER_SHIFT;
|
||||
total >>= SCHED_CAPACITY_SHIFT;
|
||||
|
||||
return div_u64(available, total);
|
||||
}
|
||||
|
||||
static void update_cpu_power(struct sched_domain *sd, int cpu)
|
||||
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long weight = sd->span_weight;
|
||||
unsigned long power = SCHED_POWER_SCALE;
|
||||
unsigned long capacity = SCHED_CAPACITY_SCALE;
|
||||
struct sched_group *sdg = sd->groups;
|
||||
|
||||
if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
|
||||
if (sched_feat(ARCH_POWER))
|
||||
power *= arch_scale_smt_power(sd, cpu);
|
||||
if ((sd->flags & SD_SHARE_CPUCAPACITY) && weight > 1) {
|
||||
if (sched_feat(ARCH_CAPACITY))
|
||||
capacity *= arch_scale_smt_capacity(sd, cpu);
|
||||
else
|
||||
power *= default_scale_smt_power(sd, cpu);
|
||||
capacity *= default_scale_smt_capacity(sd, cpu);
|
||||
|
||||
power >>= SCHED_POWER_SHIFT;
|
||||
capacity >>= SCHED_CAPACITY_SHIFT;
|
||||
}
|
||||
|
||||
sdg->sgp->power_orig = power;
|
||||
sdg->sgc->capacity_orig = capacity;
|
||||
|
||||
if (sched_feat(ARCH_POWER))
|
||||
power *= arch_scale_freq_power(sd, cpu);
|
||||
if (sched_feat(ARCH_CAPACITY))
|
||||
capacity *= arch_scale_freq_capacity(sd, cpu);
|
||||
else
|
||||
power *= default_scale_freq_power(sd, cpu);
|
||||
capacity *= default_scale_capacity(sd, cpu);
|
||||
|
||||
power >>= SCHED_POWER_SHIFT;
|
||||
capacity >>= SCHED_CAPACITY_SHIFT;
|
||||
|
||||
power *= scale_rt_power(cpu);
|
||||
power >>= SCHED_POWER_SHIFT;
|
||||
capacity *= scale_rt_capacity(cpu);
|
||||
capacity >>= SCHED_CAPACITY_SHIFT;
|
||||
|
||||
if (!power)
|
||||
power = 1;
|
||||
if (!capacity)
|
||||
capacity = 1;
|
||||
|
||||
cpu_rq(cpu)->cpu_power = power;
|
||||
sdg->sgp->power = power;
|
||||
cpu_rq(cpu)->cpu_capacity = capacity;
|
||||
sdg->sgc->capacity = capacity;
|
||||
}
|
||||
|
||||
void update_group_power(struct sched_domain *sd, int cpu)
|
||||
void update_group_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
struct sched_domain *child = sd->child;
|
||||
struct sched_group *group, *sdg = sd->groups;
|
||||
unsigned long power, power_orig;
|
||||
unsigned long capacity, capacity_orig;
|
||||
unsigned long interval;
|
||||
|
||||
interval = msecs_to_jiffies(sd->balance_interval);
|
||||
interval = clamp(interval, 1UL, max_load_balance_interval);
|
||||
sdg->sgp->next_update = jiffies + interval;
|
||||
sdg->sgc->next_update = jiffies + interval;
|
||||
|
||||
if (!child) {
|
||||
update_cpu_power(sd, cpu);
|
||||
update_cpu_capacity(sd, cpu);
|
||||
return;
|
||||
}
|
||||
|
||||
power_orig = power = 0;
|
||||
capacity_orig = capacity = 0;
|
||||
|
||||
if (child->flags & SD_OVERLAP) {
|
||||
/*
|
||||
@@ -5723,31 +5726,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
|
||||
*/
|
||||
|
||||
for_each_cpu(cpu, sched_group_cpus(sdg)) {
|
||||
struct sched_group_power *sgp;
|
||||
struct sched_group_capacity *sgc;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
/*
|
||||
* build_sched_domains() -> init_sched_groups_power()
|
||||
* build_sched_domains() -> init_sched_groups_capacity()
|
||||
* gets here before we've attached the domains to the
|
||||
* runqueues.
|
||||
*
|
||||
* Use power_of(), which is set irrespective of domains
|
||||
* in update_cpu_power().
|
||||
* Use capacity_of(), which is set irrespective of domains
|
||||
* in update_cpu_capacity().
|
||||
*
|
||||
* This avoids power/power_orig from being 0 and
|
||||
* This avoids capacity/capacity_orig from being 0 and
|
||||
* causing divide-by-zero issues on boot.
|
||||
*
|
||||
* Runtime updates will correct power_orig.
|
||||
* Runtime updates will correct capacity_orig.
|
||||
*/
|
||||
if (unlikely(!rq->sd)) {
|
||||
power_orig += power_of(cpu);
|
||||
power += power_of(cpu);
|
||||
capacity_orig += capacity_of(cpu);
|
||||
capacity += capacity_of(cpu);
|
||||
continue;
|
||||
}
|
||||
|
||||
sgp = rq->sd->groups->sgp;
|
||||
power_orig += sgp->power_orig;
|
||||
power += sgp->power;
|
||||
sgc = rq->sd->groups->sgc;
|
||||
capacity_orig += sgc->capacity_orig;
|
||||
capacity += sgc->capacity;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@@ -5757,14 +5760,14 @@ void update_group_power(struct sched_domain *sd, int cpu)
|
||||
|
||||
group = child->groups;
|
||||
do {
|
||||
power_orig += group->sgp->power_orig;
|
||||
power += group->sgp->power;
|
||||
capacity_orig += group->sgc->capacity_orig;
|
||||
capacity += group->sgc->capacity;
|
||||
group = group->next;
|
||||
} while (group != child->groups);
|
||||
}
|
||||
|
||||
sdg->sgp->power_orig = power_orig;
|
||||
sdg->sgp->power = power;
|
||||
sdg->sgc->capacity_orig = capacity_orig;
|
||||
sdg->sgc->capacity = capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5778,15 +5781,15 @@ static inline int
|
||||
fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
|
||||
{
|
||||
/*
|
||||
* Only siblings can have significantly less than SCHED_POWER_SCALE
|
||||
* Only siblings can have significantly less than SCHED_CAPACITY_SCALE
|
||||
*/
|
||||
if (!(sd->flags & SD_SHARE_CPUPOWER))
|
||||
if (!(sd->flags & SD_SHARE_CPUCAPACITY))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* If ~90% of the cpu_power is still there, we're good.
|
||||
* If ~90% of the cpu_capacity is still there, we're good.
|
||||
*/
|
||||
if (group->sgp->power * 32 > group->sgp->power_orig * 29)
|
||||
if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
@@ -5823,34 +5826,35 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
|
||||
|
||||
static inline int sg_imbalanced(struct sched_group *group)
|
||||
{
|
||||
return group->sgp->imbalance;
|
||||
return group->sgc->imbalance;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the group capacity.
|
||||
* Compute the group capacity factor.
|
||||
*
|
||||
* Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by
|
||||
* Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
|
||||
* first dividing out the smt factor and computing the actual number of cores
|
||||
* and limit power unit capacity with that.
|
||||
* and limit unit capacity with that.
|
||||
*/
|
||||
static inline int sg_capacity(struct lb_env *env, struct sched_group *group)
|
||||
static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
|
||||
{
|
||||
unsigned int capacity, smt, cpus;
|
||||
unsigned int power, power_orig;
|
||||
unsigned int capacity_factor, smt, cpus;
|
||||
unsigned int capacity, capacity_orig;
|
||||
|
||||
power = group->sgp->power;
|
||||
power_orig = group->sgp->power_orig;
|
||||
capacity = group->sgc->capacity;
|
||||
capacity_orig = group->sgc->capacity_orig;
|
||||
cpus = group->group_weight;
|
||||
|
||||
/* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */
|
||||
smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig);
|
||||
capacity = cpus / smt; /* cores */
|
||||
/* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
|
||||
smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
|
||||
capacity_factor = cpus / smt; /* cores */
|
||||
|
||||
capacity = min_t(unsigned, capacity, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE));
|
||||
if (!capacity)
|
||||
capacity = fix_small_capacity(env->sd, group);
|
||||
capacity_factor = min_t(unsigned,
|
||||
capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
|
||||
if (!capacity_factor)
|
||||
capacity_factor = fix_small_capacity(env->sd, group);
|
||||
|
||||
return capacity;
|
||||
return capacity_factor;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -5890,9 +5894,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||
sgs->idle_cpus++;
|
||||
}
|
||||
|
||||
/* Adjust by relative CPU power of the group */
|
||||
sgs->group_power = group->sgp->power;
|
||||
sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power;
|
||||
/* Adjust by relative CPU capacity of the group */
|
||||
sgs->group_capacity = group->sgc->capacity;
|
||||
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
|
||||
|
||||
if (sgs->sum_nr_running)
|
||||
sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
|
||||
@@ -5900,10 +5904,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
|
||||
sgs->group_weight = group->group_weight;
|
||||
|
||||
sgs->group_imb = sg_imbalanced(group);
|
||||
sgs->group_capacity = sg_capacity(env, group);
|
||||
sgs->group_capacity_factor = sg_capacity_factor(env, group);
|
||||
|
||||
if (sgs->group_capacity > sgs->sum_nr_running)
|
||||
sgs->group_has_capacity = 1;
|
||||
if (sgs->group_capacity_factor > sgs->sum_nr_running)
|
||||
sgs->group_has_free_capacity = 1;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -5927,7 +5931,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
||||
if (sgs->avg_load <= sds->busiest_stat.avg_load)
|
||||
return false;
|
||||
|
||||
if (sgs->sum_nr_running > sgs->group_capacity)
|
||||
if (sgs->sum_nr_running > sgs->group_capacity_factor)
|
||||
return true;
|
||||
|
||||
if (sgs->group_imb)
|
||||
@@ -6007,8 +6011,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
sgs = &sds->local_stat;
|
||||
|
||||
if (env->idle != CPU_NEWLY_IDLE ||
|
||||
time_after_eq(jiffies, sg->sgp->next_update))
|
||||
update_group_power(env->sd, env->dst_cpu);
|
||||
time_after_eq(jiffies, sg->sgc->next_update))
|
||||
update_group_capacity(env->sd, env->dst_cpu);
|
||||
}
|
||||
|
||||
update_sg_lb_stats(env, sg, load_idx, local_group, sgs);
|
||||
@@ -6018,17 +6022,17 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
|
||||
/*
|
||||
* In case the child domain prefers tasks go to siblings
|
||||
* first, lower the sg capacity to one so that we'll try
|
||||
* first, lower the sg capacity factor to one so that we'll try
|
||||
* and move all the excess tasks away. We lower the capacity
|
||||
* of a group only if the local group has the capacity to fit
|
||||
* these excess tasks, i.e. nr_running < group_capacity. The
|
||||
* these excess tasks, i.e. nr_running < group_capacity_factor. The
|
||||
* extra check prevents the case where you always pull from the
|
||||
* heaviest group when it is already under-utilized (possible
|
||||
* with a large weight task outweighs the tasks on the system).
|
||||
*/
|
||||
if (prefer_sibling && sds->local &&
|
||||
sds->local_stat.group_has_capacity)
|
||||
sgs->group_capacity = min(sgs->group_capacity, 1U);
|
||||
sds->local_stat.group_has_free_capacity)
|
||||
sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
|
||||
|
||||
if (update_sd_pick_busiest(env, sds, sg, sgs)) {
|
||||
sds->busiest = sg;
|
||||
@@ -6038,7 +6042,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
|
||||
next_group:
|
||||
/* Now, start updating sd_lb_stats */
|
||||
sds->total_load += sgs->group_load;
|
||||
sds->total_pwr += sgs->group_power;
|
||||
sds->total_capacity += sgs->group_capacity;
|
||||
|
||||
sg = sg->next;
|
||||
} while (sg != env->sd->groups);
|
||||
@@ -6085,8 +6089,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
return 0;
|
||||
|
||||
env->imbalance = DIV_ROUND_CLOSEST(
|
||||
sds->busiest_stat.avg_load * sds->busiest_stat.group_power,
|
||||
SCHED_POWER_SCALE);
|
||||
sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity,
|
||||
SCHED_CAPACITY_SCALE);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@@ -6101,7 +6105,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
static inline
|
||||
void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
{
|
||||
unsigned long tmp, pwr_now = 0, pwr_move = 0;
|
||||
unsigned long tmp, capa_now = 0, capa_move = 0;
|
||||
unsigned int imbn = 2;
|
||||
unsigned long scaled_busy_load_per_task;
|
||||
struct sg_lb_stats *local, *busiest;
|
||||
@@ -6115,8 +6119,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
imbn = 1;
|
||||
|
||||
scaled_busy_load_per_task =
|
||||
(busiest->load_per_task * SCHED_POWER_SCALE) /
|
||||
busiest->group_power;
|
||||
(busiest->load_per_task * SCHED_CAPACITY_SCALE) /
|
||||
busiest->group_capacity;
|
||||
|
||||
if (busiest->avg_load + scaled_busy_load_per_task >=
|
||||
local->avg_load + (scaled_busy_load_per_task * imbn)) {
|
||||
@@ -6126,38 +6130,38 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
|
||||
|
||||
/*
|
||||
* OK, we don't have enough imbalance to justify moving tasks,
|
||||
* however we may be able to increase total CPU power used by
|
||||
* however we may be able to increase total CPU capacity used by
|
||||
* moving them.
|
||||
*/
|
||||
|
||||
pwr_now += busiest->group_power *
|
||||
capa_now += busiest->group_capacity *
|
||||
min(busiest->load_per_task, busiest->avg_load);
|
||||
pwr_now += local->group_power *
|
||||
capa_now += local->group_capacity *
|
||||
min(local->load_per_task, local->avg_load);
|
||||
pwr_now /= SCHED_POWER_SCALE;
|
||||
capa_now /= SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* Amount of load we'd subtract */
|
||||
if (busiest->avg_load > scaled_busy_load_per_task) {
|
||||
pwr_move += busiest->group_power *
|
||||
capa_move += busiest->group_capacity *
|
||||
min(busiest->load_per_task,
|
||||
busiest->avg_load - scaled_busy_load_per_task);
|
||||
}
|
||||
|
||||
/* Amount of load we'd add */
|
||||
if (busiest->avg_load * busiest->group_power <
|
||||
busiest->load_per_task * SCHED_POWER_SCALE) {
|
||||
tmp = (busiest->avg_load * busiest->group_power) /
|
||||
local->group_power;
|
||||
if (busiest->avg_load * busiest->group_capacity <
|
||||
busiest->load_per_task * SCHED_CAPACITY_SCALE) {
|
||||
tmp = (busiest->avg_load * busiest->group_capacity) /
|
||||
local->group_capacity;
|
||||
} else {
|
||||
tmp = (busiest->load_per_task * SCHED_POWER_SCALE) /
|
||||
local->group_power;
|
||||
tmp = (busiest->load_per_task * SCHED_CAPACITY_SCALE) /
|
||||
local->group_capacity;
|
||||
}
|
||||
pwr_move += local->group_power *
|
||||
capa_move += local->group_capacity *
|
||||
min(local->load_per_task, local->avg_load + tmp);
|
||||
pwr_move /= SCHED_POWER_SCALE;
|
||||
capa_move /= SCHED_CAPACITY_SCALE;
|
||||
|
||||
/* Move if we gain throughput */
|
||||
if (pwr_move > pwr_now)
|
||||
if (capa_move > capa_now)
|
||||
env->imbalance = busiest->load_per_task;
|
||||
}
|
||||
|
||||
@@ -6187,7 +6191,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
/*
|
||||
* In the presence of smp nice balancing, certain scenarios can have
|
||||
* max load less than avg load(as we skip the groups at or below
|
||||
* its cpu_power, while calculating max_load..)
|
||||
* its cpu_capacity, while calculating max_load..)
|
||||
*/
|
||||
if (busiest->avg_load <= sds->avg_load ||
|
||||
local->avg_load >= sds->avg_load) {
|
||||
@@ -6202,10 +6206,10 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
* have to drop below capacity to reach cpu-load equilibrium.
|
||||
*/
|
||||
load_above_capacity =
|
||||
(busiest->sum_nr_running - busiest->group_capacity);
|
||||
(busiest->sum_nr_running - busiest->group_capacity_factor);
|
||||
|
||||
load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE);
|
||||
load_above_capacity /= busiest->group_power;
|
||||
load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
|
||||
load_above_capacity /= busiest->group_capacity;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6220,9 +6224,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
|
||||
|
||||
/* How much load to actually move to equalise the imbalance */
|
||||
env->imbalance = min(
|
||||
max_pull * busiest->group_power,
|
||||
(sds->avg_load - local->avg_load) * local->group_power
|
||||
) / SCHED_POWER_SCALE;
|
||||
max_pull * busiest->group_capacity,
|
||||
(sds->avg_load - local->avg_load) * local->group_capacity
|
||||
) / SCHED_CAPACITY_SCALE;
|
||||
|
||||
/*
|
||||
* if *imbalance is less than the average load per runnable task
|
||||
@@ -6276,7 +6280,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||
if (!sds.busiest || busiest->sum_nr_running == 0)
|
||||
goto out_balanced;
|
||||
|
||||
sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr;
|
||||
sds.avg_load = (SCHED_CAPACITY_SCALE * sds.total_load)
|
||||
/ sds.total_capacity;
|
||||
|
||||
/*
|
||||
* If the busiest group is imbalanced the below checks don't
|
||||
@@ -6287,8 +6292,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
|
||||
goto force_balance;
|
||||
|
||||
/* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
|
||||
if (env->idle == CPU_NEWLY_IDLE && local->group_has_capacity &&
|
||||
!busiest->group_has_capacity)
|
||||
if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
|
||||
!busiest->group_has_free_capacity)
|
||||
goto force_balance;
|
||||
|
||||
/*
|
||||
@@ -6342,11 +6347,11 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
||||
struct sched_group *group)
|
||||
{
|
||||
struct rq *busiest = NULL, *rq;
|
||||
unsigned long busiest_load = 0, busiest_power = 1;
|
||||
unsigned long busiest_load = 0, busiest_capacity = 1;
|
||||
int i;
|
||||
|
||||
for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
|
||||
unsigned long power, capacity, wl;
|
||||
unsigned long capacity, capacity_factor, wl;
|
||||
enum fbq_type rt;
|
||||
|
||||
rq = cpu_rq(i);
|
||||
@@ -6374,34 +6379,34 @@ static struct rq *find_busiest_queue(struct lb_env *env,
|
||||
if (rt > env->fbq_type)
|
||||
continue;
|
||||
|
||||
power = power_of(i);
|
||||
capacity = DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE);
|
||||
if (!capacity)
|
||||
capacity = fix_small_capacity(env->sd, group);
|
||||
capacity = capacity_of(i);
|
||||
capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
|
||||
if (!capacity_factor)
|
||||
capacity_factor = fix_small_capacity(env->sd, group);
|
||||
|
||||
wl = weighted_cpuload(i);
|
||||
|
||||
/*
|
||||
* When comparing with imbalance, use weighted_cpuload()
|
||||
* which is not scaled with the cpu power.
|
||||
* which is not scaled with the cpu capacity.
|
||||
*/
|
||||
if (capacity && rq->nr_running == 1 && wl > env->imbalance)
|
||||
if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* For the load comparisons with the other cpu's, consider
|
||||
* the weighted_cpuload() scaled with the cpu power, so that
|
||||
* the load can be moved away from the cpu that is potentially
|
||||
* running at a lower capacity.
|
||||
* the weighted_cpuload() scaled with the cpu capacity, so
|
||||
* that the load can be moved away from the cpu that is
|
||||
* potentially running at a lower capacity.
|
||||
*
|
||||
* Thus we're looking for max(wl_i / power_i), crosswise
|
||||
* Thus we're looking for max(wl_i / capacity_i), crosswise
|
||||
* multiplication to rid ourselves of the division works out
|
||||
* to: wl_i * power_j > wl_j * power_i; where j is our
|
||||
* previous maximum.
|
||||
* to: wl_i * capacity_j > wl_j * capacity_i; where j is
|
||||
* our previous maximum.
|
||||
*/
|
||||
if (wl * busiest_power > busiest_load * power) {
|
||||
if (wl * busiest_capacity > busiest_load * capacity) {
|
||||
busiest_load = wl;
|
||||
busiest_power = power;
|
||||
busiest_capacity = capacity;
|
||||
busiest = rq;
|
||||
}
|
||||
}
|
||||
@@ -6609,7 +6614,7 @@ more_balance:
|
||||
* We failed to reach balance because of affinity.
|
||||
*/
|
||||
if (sd_parent) {
|
||||
int *group_imbalance = &sd_parent->groups->sgp->imbalance;
|
||||
int *group_imbalance = &sd_parent->groups->sgc->imbalance;
|
||||
|
||||
if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) {
|
||||
*group_imbalance = 1;
|
||||
@@ -6996,7 +7001,7 @@ static inline void set_cpu_sd_state_busy(void)
|
||||
goto unlock;
|
||||
sd->nohz_idle = 0;
|
||||
|
||||
atomic_inc(&sd->groups->sgp->nr_busy_cpus);
|
||||
atomic_inc(&sd->groups->sgc->nr_busy_cpus);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@@ -7013,7 +7018,7 @@ void set_cpu_sd_state_idle(void)
|
||||
goto unlock;
|
||||
sd->nohz_idle = 1;
|
||||
|
||||
atomic_dec(&sd->groups->sgp->nr_busy_cpus);
|
||||
atomic_dec(&sd->groups->sgc->nr_busy_cpus);
|
||||
unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
@@ -7192,12 +7197,17 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
|
||||
|
||||
rq = cpu_rq(balance_cpu);
|
||||
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
update_idle_cpu_load(rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
|
||||
rebalance_domains(rq, CPU_IDLE);
|
||||
/*
|
||||
* If time for next balance is due,
|
||||
* do the balance.
|
||||
*/
|
||||
if (time_after_eq(jiffies, rq->next_balance)) {
|
||||
raw_spin_lock_irq(&rq->lock);
|
||||
update_rq_clock(rq);
|
||||
update_idle_cpu_load(rq);
|
||||
raw_spin_unlock_irq(&rq->lock);
|
||||
rebalance_domains(rq, CPU_IDLE);
|
||||
}
|
||||
|
||||
if (time_after(this_rq->next_balance, rq->next_balance))
|
||||
this_rq->next_balance = rq->next_balance;
|
||||
@@ -7212,7 +7222,7 @@ end:
|
||||
* of an idle cpu is the system.
|
||||
* - This rq has more than one task.
|
||||
* - At any scheduler domain level, this cpu's scheduler group has multiple
|
||||
* busy cpu's exceeding the group's power.
|
||||
* busy cpu's exceeding the group's capacity.
|
||||
* - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
|
||||
* domain span are idle.
|
||||
*/
|
||||
@@ -7220,7 +7230,7 @@ static inline int nohz_kick_needed(struct rq *rq)
|
||||
{
|
||||
unsigned long now = jiffies;
|
||||
struct sched_domain *sd;
|
||||
struct sched_group_power *sgp;
|
||||
struct sched_group_capacity *sgc;
|
||||
int nr_busy, cpu = rq->cpu;
|
||||
|
||||
if (unlikely(rq->idle_balance))
|
||||
@@ -7250,8 +7260,8 @@ static inline int nohz_kick_needed(struct rq *rq)
|
||||
sd = rcu_dereference(per_cpu(sd_busy, cpu));
|
||||
|
||||
if (sd) {
|
||||
sgp = sd->groups->sgp;
|
||||
nr_busy = atomic_read(&sgp->nr_busy_cpus);
|
||||
sgc = sd->groups->sgc;
|
||||
nr_busy = atomic_read(&sgc->nr_busy_cpus);
|
||||
|
||||
if (nr_busy > 1)
|
||||
goto need_kick_unlock;
|
||||
|
@@ -37,18 +37,18 @@ SCHED_FEAT(CACHE_HOT_BUDDY, true)
|
||||
SCHED_FEAT(WAKEUP_PREEMPTION, true)
|
||||
|
||||
/*
|
||||
* Use arch dependent cpu power functions
|
||||
* Use arch dependent cpu capacity functions
|
||||
*/
|
||||
SCHED_FEAT(ARCH_POWER, true)
|
||||
SCHED_FEAT(ARCH_CAPACITY, true)
|
||||
|
||||
SCHED_FEAT(HRTICK, false)
|
||||
SCHED_FEAT(DOUBLE_TICK, false)
|
||||
SCHED_FEAT(LB_BIAS, true)
|
||||
|
||||
/*
|
||||
* Decrement CPU power based on time not spent running tasks
|
||||
* Decrement CPU capacity based on time not spent running tasks
|
||||
*/
|
||||
SCHED_FEAT(NONTASK_POWER, true)
|
||||
SCHED_FEAT(NONTASK_CAPACITY, true)
|
||||
|
||||
/*
|
||||
* Queue remote wakeups on the target CPU and process them
|
||||
|
@@ -12,6 +12,8 @@
|
||||
|
||||
#include <trace/events/power.h>
|
||||
|
||||
#include "sched.h"
|
||||
|
||||
static int __read_mostly cpu_idle_force_poll;
|
||||
|
||||
void cpu_idle_poll_ctrl(bool enable)
|
||||
@@ -67,6 +69,10 @@ void __weak arch_cpu_idle(void)
|
||||
* cpuidle_idle_call - the main idle function
|
||||
*
|
||||
* NOTE: no locks or semaphores should be used here
|
||||
*
|
||||
* On archs that support TIF_POLLING_NRFLAG, is called with polling
|
||||
* set, and it returns with polling set. If it ever stops polling, it
|
||||
* must clear the polling bit.
|
||||
*/
|
||||
static void cpuidle_idle_call(void)
|
||||
{
|
||||
@@ -175,10 +181,22 @@ exit_idle:
|
||||
|
||||
/*
|
||||
* Generic idle loop implementation
|
||||
*
|
||||
* Called with polling cleared.
|
||||
*/
|
||||
static void cpu_idle_loop(void)
|
||||
{
|
||||
while (1) {
|
||||
/*
|
||||
* If the arch has a polling bit, we maintain an invariant:
|
||||
*
|
||||
* Our polling bit is clear if we're not scheduled (i.e. if
|
||||
* rq->curr != rq->idle). This means that, if rq->idle has
|
||||
* the polling bit set, then setting need_resched is
|
||||
* guaranteed to cause the cpu to reschedule.
|
||||
*/
|
||||
|
||||
__current_set_polling();
|
||||
tick_nohz_idle_enter();
|
||||
|
||||
while (!need_resched()) {
|
||||
@@ -218,6 +236,17 @@ static void cpu_idle_loop(void)
|
||||
*/
|
||||
preempt_set_need_resched();
|
||||
tick_nohz_idle_exit();
|
||||
__current_clr_polling();
|
||||
|
||||
/*
|
||||
* We promise to call sched_ttwu_pending and reschedule
|
||||
* if need_resched is set while polling is set. That
|
||||
* means that clearing polling needs to be visible
|
||||
* before doing these things.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
|
||||
sched_ttwu_pending();
|
||||
schedule_preempt_disabled();
|
||||
}
|
||||
}
|
||||
@@ -239,7 +268,6 @@ void cpu_startup_entry(enum cpuhp_state state)
|
||||
*/
|
||||
boot_init_stack_canary();
|
||||
#endif
|
||||
__current_set_polling();
|
||||
arch_cpu_idle_prepare();
|
||||
cpu_idle_loop();
|
||||
}
|
||||
|
@@ -918,7 +918,6 @@ static void update_curr_rt(struct rq *rq)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct sched_rt_entity *rt_se = &curr->rt;
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
u64 delta_exec;
|
||||
|
||||
if (curr->sched_class != &rt_sched_class)
|
||||
@@ -943,7 +942,7 @@ static void update_curr_rt(struct rq *rq)
|
||||
return;
|
||||
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
rt_rq = rt_rq_of_se(rt_se);
|
||||
struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
|
||||
|
||||
if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
|
||||
raw_spin_lock(&rt_rq->rt_runtime_lock);
|
||||
|
@@ -567,7 +567,7 @@ struct rq {
|
||||
struct root_domain *rd;
|
||||
struct sched_domain *sd;
|
||||
|
||||
unsigned long cpu_power;
|
||||
unsigned long cpu_capacity;
|
||||
|
||||
unsigned char idle_balance;
|
||||
/* For active balancing */
|
||||
@@ -670,6 +670,8 @@ extern int migrate_swap(struct task_struct *, struct task_struct *);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
extern void sched_ttwu_pending(void);
|
||||
|
||||
#define rcu_dereference_check_sched_domain(p) \
|
||||
rcu_dereference_check((p), \
|
||||
lockdep_is_held(&sched_domains_mutex))
|
||||
@@ -728,15 +730,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_busy);
|
||||
DECLARE_PER_CPU(struct sched_domain *, sd_asym);
|
||||
|
||||
struct sched_group_power {
|
||||
struct sched_group_capacity {
|
||||
atomic_t ref;
|
||||
/*
|
||||
* CPU power of this group, SCHED_LOAD_SCALE being max power for a
|
||||
* single CPU.
|
||||
* CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
|
||||
* for a single CPU.
|
||||
*/
|
||||
unsigned int power, power_orig;
|
||||
unsigned int capacity, capacity_orig;
|
||||
unsigned long next_update;
|
||||
int imbalance; /* XXX unrelated to power but shared group state */
|
||||
int imbalance; /* XXX unrelated to capacity but shared group state */
|
||||
/*
|
||||
* Number of busy cpus in this group.
|
||||
*/
|
||||
@@ -750,7 +752,7 @@ struct sched_group {
|
||||
atomic_t ref;
|
||||
|
||||
unsigned int group_weight;
|
||||
struct sched_group_power *sgp;
|
||||
struct sched_group_capacity *sgc;
|
||||
|
||||
/*
|
||||
* The CPUs this group covers.
|
||||
@@ -773,7 +775,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
|
||||
*/
|
||||
static inline struct cpumask *sched_group_mask(struct sched_group *sg)
|
||||
{
|
||||
return to_cpumask(sg->sgp->cpumask);
|
||||
return to_cpumask(sg->sgc->cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -787,6 +789,10 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
|
||||
|
||||
extern int group_balance_cpu(struct sched_group *sg);
|
||||
|
||||
#else
|
||||
|
||||
static inline void sched_ttwu_pending(void) { }
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#include "stats.h"
|
||||
@@ -1167,7 +1173,7 @@ extern const struct sched_class idle_sched_class;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
extern void update_group_power(struct sched_domain *sd, int cpu);
|
||||
extern void update_group_capacity(struct sched_domain *sd, int cpu);
|
||||
|
||||
extern void trigger_load_balance(struct rq *rq);
|
||||
|
||||
|
110
kernel/seccomp.c
110
kernel/seccomp.c
@@ -54,8 +54,7 @@
|
||||
struct seccomp_filter {
|
||||
atomic_t usage;
|
||||
struct seccomp_filter *prev;
|
||||
unsigned short len; /* Instruction count */
|
||||
struct sock_filter_int insnsi[];
|
||||
struct sk_filter *prog;
|
||||
};
|
||||
|
||||
/* Limit any path through the tree to 256KB worth of instructions. */
|
||||
@@ -104,60 +103,59 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
|
||||
u32 k = ftest->k;
|
||||
|
||||
switch (code) {
|
||||
case BPF_S_LD_W_ABS:
|
||||
case BPF_LD | BPF_W | BPF_ABS:
|
||||
ftest->code = BPF_LDX | BPF_W | BPF_ABS;
|
||||
/* 32-bit aligned and not out of bounds. */
|
||||
if (k >= sizeof(struct seccomp_data) || k & 3)
|
||||
return -EINVAL;
|
||||
continue;
|
||||
case BPF_S_LD_W_LEN:
|
||||
case BPF_LD | BPF_W | BPF_LEN:
|
||||
ftest->code = BPF_LD | BPF_IMM;
|
||||
ftest->k = sizeof(struct seccomp_data);
|
||||
continue;
|
||||
case BPF_S_LDX_W_LEN:
|
||||
case BPF_LDX | BPF_W | BPF_LEN:
|
||||
ftest->code = BPF_LDX | BPF_IMM;
|
||||
ftest->k = sizeof(struct seccomp_data);
|
||||
continue;
|
||||
/* Explicitly include allowed calls. */
|
||||
case BPF_S_RET_K:
|
||||
case BPF_S_RET_A:
|
||||
case BPF_S_ALU_ADD_K:
|
||||
case BPF_S_ALU_ADD_X:
|
||||
case BPF_S_ALU_SUB_K:
|
||||
case BPF_S_ALU_SUB_X:
|
||||
case BPF_S_ALU_MUL_K:
|
||||
case BPF_S_ALU_MUL_X:
|
||||
case BPF_S_ALU_DIV_X:
|
||||
case BPF_S_ALU_AND_K:
|
||||
case BPF_S_ALU_AND_X:
|
||||
case BPF_S_ALU_OR_K:
|
||||
case BPF_S_ALU_OR_X:
|
||||
case BPF_S_ALU_XOR_K:
|
||||
case BPF_S_ALU_XOR_X:
|
||||
case BPF_S_ALU_LSH_K:
|
||||
case BPF_S_ALU_LSH_X:
|
||||
case BPF_S_ALU_RSH_K:
|
||||
case BPF_S_ALU_RSH_X:
|
||||
case BPF_S_ALU_NEG:
|
||||
case BPF_S_LD_IMM:
|
||||
case BPF_S_LDX_IMM:
|
||||
case BPF_S_MISC_TAX:
|
||||
case BPF_S_MISC_TXA:
|
||||
case BPF_S_ALU_DIV_K:
|
||||
case BPF_S_LD_MEM:
|
||||
case BPF_S_LDX_MEM:
|
||||
case BPF_S_ST:
|
||||
case BPF_S_STX:
|
||||
case BPF_S_JMP_JA:
|
||||
case BPF_S_JMP_JEQ_K:
|
||||
case BPF_S_JMP_JEQ_X:
|
||||
case BPF_S_JMP_JGE_K:
|
||||
case BPF_S_JMP_JGE_X:
|
||||
case BPF_S_JMP_JGT_K:
|
||||
case BPF_S_JMP_JGT_X:
|
||||
case BPF_S_JMP_JSET_K:
|
||||
case BPF_S_JMP_JSET_X:
|
||||
sk_decode_filter(ftest, ftest);
|
||||
case BPF_RET | BPF_K:
|
||||
case BPF_RET | BPF_A:
|
||||
case BPF_ALU | BPF_ADD | BPF_K:
|
||||
case BPF_ALU | BPF_ADD | BPF_X:
|
||||
case BPF_ALU | BPF_SUB | BPF_K:
|
||||
case BPF_ALU | BPF_SUB | BPF_X:
|
||||
case BPF_ALU | BPF_MUL | BPF_K:
|
||||
case BPF_ALU | BPF_MUL | BPF_X:
|
||||
case BPF_ALU | BPF_DIV | BPF_K:
|
||||
case BPF_ALU | BPF_DIV | BPF_X:
|
||||
case BPF_ALU | BPF_AND | BPF_K:
|
||||
case BPF_ALU | BPF_AND | BPF_X:
|
||||
case BPF_ALU | BPF_OR | BPF_K:
|
||||
case BPF_ALU | BPF_OR | BPF_X:
|
||||
case BPF_ALU | BPF_XOR | BPF_K:
|
||||
case BPF_ALU | BPF_XOR | BPF_X:
|
||||
case BPF_ALU | BPF_LSH | BPF_K:
|
||||
case BPF_ALU | BPF_LSH | BPF_X:
|
||||
case BPF_ALU | BPF_RSH | BPF_K:
|
||||
case BPF_ALU | BPF_RSH | BPF_X:
|
||||
case BPF_ALU | BPF_NEG:
|
||||
case BPF_LD | BPF_IMM:
|
||||
case BPF_LDX | BPF_IMM:
|
||||
case BPF_MISC | BPF_TAX:
|
||||
case BPF_MISC | BPF_TXA:
|
||||
case BPF_LD | BPF_MEM:
|
||||
case BPF_LDX | BPF_MEM:
|
||||
case BPF_ST:
|
||||
case BPF_STX:
|
||||
case BPF_JMP | BPF_JA:
|
||||
case BPF_JMP | BPF_JEQ | BPF_K:
|
||||
case BPF_JMP | BPF_JEQ | BPF_X:
|
||||
case BPF_JMP | BPF_JGE | BPF_K:
|
||||
case BPF_JMP | BPF_JGE | BPF_X:
|
||||
case BPF_JMP | BPF_JGT | BPF_K:
|
||||
case BPF_JMP | BPF_JGT | BPF_X:
|
||||
case BPF_JMP | BPF_JSET | BPF_K:
|
||||
case BPF_JMP | BPF_JSET | BPF_X:
|
||||
continue;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@@ -189,7 +187,8 @@ static u32 seccomp_run_filters(int syscall)
|
||||
* value always takes priority (ignoring the DATA).
|
||||
*/
|
||||
for (f = current->seccomp.filter; f; f = f->prev) {
|
||||
u32 cur_ret = sk_run_filter_int_seccomp(&sd, f->insnsi);
|
||||
u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
|
||||
|
||||
if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
|
||||
ret = cur_ret;
|
||||
}
|
||||
@@ -215,7 +214,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
return -EINVAL;
|
||||
|
||||
for (filter = current->seccomp.filter; filter; filter = filter->prev)
|
||||
total_insns += filter->len + 4; /* include a 4 instr penalty */
|
||||
total_insns += filter->prog->len + 4; /* include a 4 instr penalty */
|
||||
if (total_insns > MAX_INSNS_PER_PATH)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -256,19 +255,25 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
|
||||
/* Allocate a new seccomp_filter */
|
||||
ret = -ENOMEM;
|
||||
filter = kzalloc(sizeof(struct seccomp_filter) +
|
||||
sizeof(struct sock_filter_int) * new_len,
|
||||
filter = kzalloc(sizeof(struct seccomp_filter),
|
||||
GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!filter)
|
||||
goto free_prog;
|
||||
|
||||
ret = sk_convert_filter(fp, fprog->len, filter->insnsi, &new_len);
|
||||
if (ret)
|
||||
filter->prog = kzalloc(sk_filter_size(new_len),
|
||||
GFP_KERNEL|__GFP_NOWARN);
|
||||
if (!filter->prog)
|
||||
goto free_filter;
|
||||
|
||||
ret = sk_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
|
||||
if (ret)
|
||||
goto free_filter_prog;
|
||||
kfree(fp);
|
||||
|
||||
atomic_set(&filter->usage, 1);
|
||||
filter->len = new_len;
|
||||
filter->prog->len = new_len;
|
||||
|
||||
sk_filter_select_runtime(filter->prog);
|
||||
|
||||
/*
|
||||
* If there is an existing filter, make it the prev and don't drop its
|
||||
@@ -278,6 +283,8 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
|
||||
current->seccomp.filter = filter;
|
||||
return 0;
|
||||
|
||||
free_filter_prog:
|
||||
kfree(filter->prog);
|
||||
free_filter:
|
||||
kfree(filter);
|
||||
free_prog:
|
||||
@@ -330,6 +337,7 @@ void put_seccomp_filter(struct task_struct *tsk)
|
||||
while (orig && atomic_dec_and_test(&orig->usage)) {
|
||||
struct seccomp_filter *freeme = orig;
|
||||
orig = orig->prev;
|
||||
sk_filter_free(freeme->prog);
|
||||
kfree(freeme);
|
||||
}
|
||||
}
|
||||
|
57
kernel/smp.c
57
kernel/smp.c
@@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
|
||||
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline);
|
||||
|
||||
static int
|
||||
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
{
|
||||
@@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
/* Fall-through to the CPU_DEAD[_FROZEN] case. */
|
||||
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
free_cpumask_var(cfd->cpumask);
|
||||
free_percpu(cfd->csd);
|
||||
break;
|
||||
|
||||
case CPU_DYING:
|
||||
case CPU_DYING_FROZEN:
|
||||
/*
|
||||
* The IPIs for the smp-call-function callbacks queued by other
|
||||
* CPUs might arrive late, either due to hardware latencies or
|
||||
* because this CPU disabled interrupts (inside stop-machine)
|
||||
* before the IPIs were sent. So flush out any pending callbacks
|
||||
* explicitly (without waiting for the IPIs to arrive), to
|
||||
* ensure that the outgoing CPU doesn't go offline with work
|
||||
* still pending.
|
||||
*/
|
||||
flush_smp_call_function_queue(false);
|
||||
break;
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoked by arch to handle an IPI for call function single. Must be
|
||||
* called from the arch with interrupts disabled.
|
||||
/**
|
||||
* generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks
|
||||
*
|
||||
* Invoked by arch to handle an IPI for call function single.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
void generic_smp_call_function_single_interrupt(void)
|
||||
{
|
||||
flush_smp_call_function_queue(true);
|
||||
}
|
||||
|
||||
/**
|
||||
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
|
||||
*
|
||||
* @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an
|
||||
* offline CPU. Skip this check if set to 'false'.
|
||||
*
|
||||
* Flush any pending smp-call-function callbacks queued on this CPU. This is
|
||||
* invoked by the generic IPI handler, as well as by a CPU about to go offline,
|
||||
* to ensure that all pending IPI callbacks are run before it goes completely
|
||||
* offline.
|
||||
*
|
||||
* Loop through the call_single_queue and run all the queued callbacks.
|
||||
* Must be called with interrupts disabled.
|
||||
*/
|
||||
static void flush_smp_call_function_queue(bool warn_cpu_offline)
|
||||
{
|
||||
struct llist_head *head;
|
||||
struct llist_node *entry;
|
||||
struct call_single_data *csd, *csd_next;
|
||||
static bool warned;
|
||||
|
||||
entry = llist_del_all(&__get_cpu_var(call_single_queue));
|
||||
WARN_ON(!irqs_disabled());
|
||||
|
||||
head = &__get_cpu_var(call_single_queue);
|
||||
entry = llist_del_all(head);
|
||||
entry = llist_reverse_order(entry);
|
||||
|
||||
/*
|
||||
* Shouldn't receive this interrupt on a cpu that is not yet online.
|
||||
*/
|
||||
if (unlikely(!cpu_online(smp_processor_id()) && !warned)) {
|
||||
/* There shouldn't be any pending callbacks on an offline CPU. */
|
||||
if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) &&
|
||||
!warned && !llist_empty(head))) {
|
||||
warned = true;
|
||||
WARN(1, "IPI on offline CPU %d\n", smp_processor_id());
|
||||
|
||||
|
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
|
||||
/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
|
||||
static int maxolduid = 65535;
|
||||
static int minolduid;
|
||||
static int min_percpu_pagelist_fract = 8;
|
||||
|
||||
static int ngroups_max = NGROUPS_MAX;
|
||||
static const int cap_last_cap = CAP_LAST_CAP;
|
||||
@@ -152,10 +151,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
|
||||
#ifdef CONFIG_SPARC
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SPARC64
|
||||
extern int sysctl_tsb_ratio;
|
||||
#endif
|
||||
|
||||
#ifdef __hppa__
|
||||
extern int pwrsw_enabled;
|
||||
#endif
|
||||
@@ -865,6 +860,17 @@ static struct ctl_table kern_table[] = {
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#ifdef CONFIG_SMP
|
||||
{
|
||||
.procname = "softlockup_all_cpu_backtrace",
|
||||
.data = &sysctl_softlockup_all_cpu_backtrace,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = &zero,
|
||||
.extra2 = &one,
|
||||
},
|
||||
#endif /* CONFIG_SMP */
|
||||
{
|
||||
.procname = "nmi_watchdog",
|
||||
.data = &watchdog_user_enabled,
|
||||
@@ -1321,7 +1327,7 @@ static struct ctl_table vm_table[] = {
|
||||
.maxlen = sizeof(percpu_pagelist_fraction),
|
||||
.mode = 0644,
|
||||
.proc_handler = percpu_pagelist_fraction_sysctl_handler,
|
||||
.extra1 = &min_percpu_pagelist_fract,
|
||||
.extra1 = &zero,
|
||||
},
|
||||
#ifdef CONFIG_MMU
|
||||
{
|
||||
@@ -2568,11 +2574,11 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
|
||||
bool first = 1;
|
||||
size_t left = *lenp;
|
||||
unsigned long bitmap_len = table->maxlen;
|
||||
unsigned long *bitmap = (unsigned long *) table->data;
|
||||
unsigned long *bitmap = *(unsigned long **) table->data;
|
||||
unsigned long *tmp_bitmap = NULL;
|
||||
char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
|
||||
|
||||
if (!bitmap_len || !left || (*ppos && !write)) {
|
||||
if (!bitmap || !bitmap_len || !left || (*ppos && !write)) {
|
||||
*lenp = 0;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -535,6 +535,36 @@ config MMIOTRACE_TEST
|
||||
|
||||
Say N, unless you absolutely know what you are doing.
|
||||
|
||||
config TRACEPOINT_BENCHMARK
|
||||
bool "Add tracepoint that benchmarks tracepoints"
|
||||
help
|
||||
This option creates the tracepoint "benchmark:benchmark_event".
|
||||
When the tracepoint is enabled, it kicks off a kernel thread that
|
||||
goes into an infinite loop (calling cond_sched() to let other tasks
|
||||
run), and calls the tracepoint. Each iteration will record the time
|
||||
it took to write to the tracepoint and the next iteration that
|
||||
data will be passed to the tracepoint itself. That is, the tracepoint
|
||||
will report the time it took to do the previous tracepoint.
|
||||
The string written to the tracepoint is a static string of 128 bytes
|
||||
to keep the time the same. The initial string is simply a write of
|
||||
"START". The second string records the cold cache time of the first
|
||||
write which is not added to the rest of the calculations.
|
||||
|
||||
As it is a tight loop, it benchmarks as hot cache. That's fine because
|
||||
we care most about hot paths that are probably in cache already.
|
||||
|
||||
An example of the output:
|
||||
|
||||
START
|
||||
first=3672 [COLD CACHED]
|
||||
last=632 first=3672 max=632 min=632 avg=316 std=446 std^2=199712
|
||||
last=278 first=3672 max=632 min=278 avg=303 std=316 std^2=100337
|
||||
last=277 first=3672 max=632 min=277 avg=296 std=258 std^2=67064
|
||||
last=273 first=3672 max=632 min=273 avg=292 std=224 std^2=50411
|
||||
last=273 first=3672 max=632 min=273 avg=288 std=200 std^2=40389
|
||||
last=281 first=3672 max=632 min=273 avg=287 std=183 std^2=33666
|
||||
|
||||
|
||||
config RING_BUFFER_BENCHMARK
|
||||
tristate "Ring buffer benchmark stress tester"
|
||||
depends on RING_BUFFER
|
||||
|
@@ -17,6 +17,7 @@ ifdef CONFIG_TRACING_BRANCHES
|
||||
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
|
||||
endif
|
||||
|
||||
CFLAGS_trace_benchmark.o := -I$(src)
|
||||
CFLAGS_trace_events_filter.o := -I$(src)
|
||||
|
||||
obj-$(CONFIG_TRACE_CLOCK) += trace_clock.o
|
||||
@@ -62,4 +63,6 @@ endif
|
||||
obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
|
||||
obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o
|
||||
|
||||
obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
|
||||
|
||||
libftrace-y := ftrace.o
|
||||
|
@@ -62,7 +62,7 @@
|
||||
#define FTRACE_HASH_DEFAULT_BITS 10
|
||||
#define FTRACE_HASH_MAX_BITS 12
|
||||
|
||||
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
|
||||
#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
#define INIT_REGEX_LOCK(opsname) \
|
||||
@@ -103,7 +103,6 @@ static int ftrace_disabled __read_mostly;
|
||||
|
||||
static DEFINE_MUTEX(ftrace_lock);
|
||||
|
||||
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
|
||||
static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
|
||||
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
|
||||
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
|
||||
@@ -171,23 +170,6 @@ int ftrace_nr_registered_ops(void)
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct pt_regs *regs)
|
||||
{
|
||||
int bit;
|
||||
|
||||
bit = trace_test_and_set_recursion(TRACE_GLOBAL_START, TRACE_GLOBAL_MAX);
|
||||
if (bit < 0)
|
||||
return;
|
||||
|
||||
do_for_each_ftrace_op(op, ftrace_global_list) {
|
||||
op->func(ip, parent_ip, op, regs);
|
||||
} while_for_each_ftrace_op(op);
|
||||
|
||||
trace_clear_recursion(bit);
|
||||
}
|
||||
|
||||
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct pt_regs *regs)
|
||||
{
|
||||
@@ -237,43 +219,6 @@ static int control_ops_alloc(struct ftrace_ops *ops)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_global_ops(void)
|
||||
{
|
||||
ftrace_func_t func = ftrace_global_list_func;
|
||||
void *private = NULL;
|
||||
|
||||
/* The list has its own recursion protection. */
|
||||
global_ops.flags |= FTRACE_OPS_FL_RECURSION_SAFE;
|
||||
|
||||
/*
|
||||
* If there's only one function registered, then call that
|
||||
* function directly. Otherwise, we need to iterate over the
|
||||
* registered callers.
|
||||
*/
|
||||
if (ftrace_global_list == &ftrace_list_end ||
|
||||
ftrace_global_list->next == &ftrace_list_end) {
|
||||
func = ftrace_global_list->func;
|
||||
private = ftrace_global_list->private;
|
||||
/*
|
||||
* As we are calling the function directly.
|
||||
* If it does not have recursion protection,
|
||||
* the function_trace_op needs to be updated
|
||||
* accordingly.
|
||||
*/
|
||||
if (!(ftrace_global_list->flags & FTRACE_OPS_FL_RECURSION_SAFE))
|
||||
global_ops.flags &= ~FTRACE_OPS_FL_RECURSION_SAFE;
|
||||
}
|
||||
|
||||
/* If we filter on pids, update to use the pid function */
|
||||
if (!list_empty(&ftrace_pids)) {
|
||||
set_ftrace_pid_function(func);
|
||||
func = ftrace_pid_func;
|
||||
}
|
||||
|
||||
global_ops.func = func;
|
||||
global_ops.private = private;
|
||||
}
|
||||
|
||||
static void ftrace_sync(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
@@ -301,8 +246,6 @@ static void update_ftrace_function(void)
|
||||
{
|
||||
ftrace_func_t func;
|
||||
|
||||
update_global_ops();
|
||||
|
||||
/*
|
||||
* If we are at the end of the list and this ops is
|
||||
* recursion safe and not dynamic and the arch supports passing ops,
|
||||
@@ -314,10 +257,7 @@ static void update_ftrace_function(void)
|
||||
(ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE) &&
|
||||
!FTRACE_FORCE_LIST_FUNC)) {
|
||||
/* Set the ftrace_ops that the arch callback uses */
|
||||
if (ftrace_ops_list == &global_ops)
|
||||
set_function_trace_op = ftrace_global_list;
|
||||
else
|
||||
set_function_trace_op = ftrace_ops_list;
|
||||
set_function_trace_op = ftrace_ops_list;
|
||||
func = ftrace_ops_list->func;
|
||||
} else {
|
||||
/* Just use the default ftrace_ops */
|
||||
@@ -373,6 +313,11 @@ static void update_ftrace_function(void)
|
||||
ftrace_trace_function = func;
|
||||
}
|
||||
|
||||
int using_ftrace_ops_list_func(void)
|
||||
{
|
||||
return ftrace_trace_function == ftrace_ops_list_func;
|
||||
}
|
||||
|
||||
static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
|
||||
{
|
||||
ops->next = *list;
|
||||
@@ -434,16 +379,9 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
|
||||
if (ops->flags & FTRACE_OPS_FL_DELETED)
|
||||
return -EINVAL;
|
||||
|
||||
if (FTRACE_WARN_ON(ops == &global_ops))
|
||||
return -EINVAL;
|
||||
|
||||
if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
|
||||
return -EBUSY;
|
||||
|
||||
/* We don't support both control and global flags set. */
|
||||
if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_REGS
|
||||
/*
|
||||
* If the ftrace_ops specifies SAVE_REGS, then it only can be used
|
||||
@@ -461,10 +399,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
|
||||
if (!core_kernel_data((unsigned long)ops))
|
||||
ops->flags |= FTRACE_OPS_FL_DYNAMIC;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
|
||||
add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
|
||||
ops->flags |= FTRACE_OPS_FL_ENABLED;
|
||||
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
|
||||
if (ops->flags & FTRACE_OPS_FL_CONTROL) {
|
||||
if (control_ops_alloc(ops))
|
||||
return -ENOMEM;
|
||||
add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
|
||||
@@ -484,15 +419,7 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
|
||||
if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
|
||||
return -EBUSY;
|
||||
|
||||
if (FTRACE_WARN_ON(ops == &global_ops))
|
||||
return -EINVAL;
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
|
||||
ret = remove_ftrace_list_ops(&ftrace_global_list,
|
||||
&global_ops, ops);
|
||||
if (!ret)
|
||||
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
|
||||
} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
|
||||
if (ops->flags & FTRACE_OPS_FL_CONTROL) {
|
||||
ret = remove_ftrace_list_ops(&ftrace_control_list,
|
||||
&control_ops, ops);
|
||||
} else
|
||||
@@ -895,7 +822,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip,
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
stat = &__get_cpu_var(ftrace_profile_stats);
|
||||
stat = this_cpu_ptr(&ftrace_profile_stats);
|
||||
if (!stat->hash || !ftrace_profile_enabled)
|
||||
goto out;
|
||||
|
||||
@@ -926,7 +853,7 @@ static void profile_graph_return(struct ftrace_graph_ret *trace)
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
stat = &__get_cpu_var(ftrace_profile_stats);
|
||||
stat = this_cpu_ptr(&ftrace_profile_stats);
|
||||
if (!stat->hash || !ftrace_profile_enabled)
|
||||
goto out;
|
||||
|
||||
@@ -1178,7 +1105,7 @@ struct ftrace_page {
|
||||
static struct ftrace_page *ftrace_pages_start;
|
||||
static struct ftrace_page *ftrace_pages;
|
||||
|
||||
static bool ftrace_hash_empty(struct ftrace_hash *hash)
|
||||
static bool __always_inline ftrace_hash_empty(struct ftrace_hash *hash)
|
||||
{
|
||||
return !hash || !hash->count;
|
||||
}
|
||||
@@ -1625,7 +1552,14 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
|
||||
in_other_hash = !!ftrace_lookup_ip(other_hash, rec->ip);
|
||||
|
||||
/*
|
||||
* If filter_hash is set, we want to match all functions
|
||||
* that are in the hash but not in the other hash.
|
||||
*
|
||||
* If filter_hash is not set, then we are decrementing.
|
||||
* That means we match anything that is in the hash
|
||||
* and also in the other_hash. That is, we need to turn
|
||||
* off functions in the other hash because they are disabled
|
||||
* by this hash.
|
||||
*/
|
||||
if (filter_hash && in_hash && !in_other_hash)
|
||||
match = 1;
|
||||
@@ -1767,19 +1701,15 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
|
||||
/*
|
||||
* If this record is being updated from a nop, then
|
||||
* return UPDATE_MAKE_CALL.
|
||||
* Otherwise, if the EN flag is set, then return
|
||||
* UPDATE_MODIFY_CALL_REGS to tell the caller to convert
|
||||
* from the non-save regs, to a save regs function.
|
||||
* Otherwise,
|
||||
* return UPDATE_MODIFY_CALL to tell the caller to convert
|
||||
* from the save regs, to a non-save regs function.
|
||||
* from the save regs, to a non-save regs function or
|
||||
* vice versa.
|
||||
*/
|
||||
if (flag & FTRACE_FL_ENABLED)
|
||||
return FTRACE_UPDATE_MAKE_CALL;
|
||||
else if (rec->flags & FTRACE_FL_REGS_EN)
|
||||
return FTRACE_UPDATE_MODIFY_CALL_REGS;
|
||||
else
|
||||
return FTRACE_UPDATE_MODIFY_CALL;
|
||||
|
||||
return FTRACE_UPDATE_MODIFY_CALL;
|
||||
}
|
||||
|
||||
if (update) {
|
||||
@@ -1821,6 +1751,42 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
|
||||
return ftrace_check_record(rec, enable, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* ftrace_get_addr_new - Get the call address to set to
|
||||
* @rec: The ftrace record descriptor
|
||||
*
|
||||
* If the record has the FTRACE_FL_REGS set, that means that it
|
||||
* wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
|
||||
* is not not set, then it wants to convert to the normal callback.
|
||||
*
|
||||
* Returns the address of the trampoline to set to
|
||||
*/
|
||||
unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
|
||||
{
|
||||
if (rec->flags & FTRACE_FL_REGS)
|
||||
return (unsigned long)FTRACE_REGS_ADDR;
|
||||
else
|
||||
return (unsigned long)FTRACE_ADDR;
|
||||
}
|
||||
|
||||
/**
|
||||
* ftrace_get_addr_curr - Get the call address that is already there
|
||||
* @rec: The ftrace record descriptor
|
||||
*
|
||||
* The FTRACE_FL_REGS_EN is set when the record already points to
|
||||
* a function that saves all the regs. Basically the '_EN' version
|
||||
* represents the current state of the function.
|
||||
*
|
||||
* Returns the address of the trampoline that is currently being called
|
||||
*/
|
||||
unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
|
||||
{
|
||||
if (rec->flags & FTRACE_FL_REGS_EN)
|
||||
return (unsigned long)FTRACE_REGS_ADDR;
|
||||
else
|
||||
return (unsigned long)FTRACE_ADDR;
|
||||
}
|
||||
|
||||
static int
|
||||
__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
|
||||
{
|
||||
@@ -1828,12 +1794,12 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
|
||||
unsigned long ftrace_addr;
|
||||
int ret;
|
||||
|
||||
ret = ftrace_update_record(rec, enable);
|
||||
ftrace_addr = ftrace_get_addr_new(rec);
|
||||
|
||||
if (rec->flags & FTRACE_FL_REGS)
|
||||
ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
|
||||
else
|
||||
ftrace_addr = (unsigned long)FTRACE_ADDR;
|
||||
/* This needs to be done before we call ftrace_update_record */
|
||||
ftrace_old_addr = ftrace_get_addr_curr(rec);
|
||||
|
||||
ret = ftrace_update_record(rec, enable);
|
||||
|
||||
switch (ret) {
|
||||
case FTRACE_UPDATE_IGNORE:
|
||||
@@ -1845,13 +1811,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
|
||||
case FTRACE_UPDATE_MAKE_NOP:
|
||||
return ftrace_make_nop(NULL, rec, ftrace_addr);
|
||||
|
||||
case FTRACE_UPDATE_MODIFY_CALL_REGS:
|
||||
case FTRACE_UPDATE_MODIFY_CALL:
|
||||
if (rec->flags & FTRACE_FL_REGS)
|
||||
ftrace_old_addr = (unsigned long)FTRACE_ADDR;
|
||||
else
|
||||
ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
|
||||
|
||||
return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
|
||||
}
|
||||
|
||||
@@ -2115,7 +2075,6 @@ static void ftrace_startup_enable(int command)
|
||||
|
||||
static int ftrace_startup(struct ftrace_ops *ops, int command)
|
||||
{
|
||||
bool hash_enable = true;
|
||||
int ret;
|
||||
|
||||
if (unlikely(ftrace_disabled))
|
||||
@@ -2128,18 +2087,9 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
|
||||
ftrace_start_up++;
|
||||
command |= FTRACE_UPDATE_CALLS;
|
||||
|
||||
/* ops marked global share the filter hashes */
|
||||
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
|
||||
ops = &global_ops;
|
||||
/* Don't update hash if global is already set */
|
||||
if (global_start_up)
|
||||
hash_enable = false;
|
||||
global_start_up++;
|
||||
}
|
||||
|
||||
ops->flags |= FTRACE_OPS_FL_ENABLED;
|
||||
if (hash_enable)
|
||||
ftrace_hash_rec_enable(ops, 1);
|
||||
|
||||
ftrace_hash_rec_enable(ops, 1);
|
||||
|
||||
ftrace_startup_enable(command);
|
||||
|
||||
@@ -2148,7 +2098,6 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
|
||||
|
||||
static int ftrace_shutdown(struct ftrace_ops *ops, int command)
|
||||
{
|
||||
bool hash_disable = true;
|
||||
int ret;
|
||||
|
||||
if (unlikely(ftrace_disabled))
|
||||
@@ -2166,21 +2115,9 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
|
||||
*/
|
||||
WARN_ON_ONCE(ftrace_start_up < 0);
|
||||
|
||||
if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
|
||||
ops = &global_ops;
|
||||
global_start_up--;
|
||||
WARN_ON_ONCE(global_start_up < 0);
|
||||
/* Don't update hash if global still has users */
|
||||
if (global_start_up) {
|
||||
WARN_ON_ONCE(!ftrace_start_up);
|
||||
hash_disable = false;
|
||||
}
|
||||
}
|
||||
ftrace_hash_rec_disable(ops, 1);
|
||||
|
||||
if (hash_disable)
|
||||
ftrace_hash_rec_disable(ops, 1);
|
||||
|
||||
if (ops != &global_ops || !global_start_up)
|
||||
if (!global_start_up)
|
||||
ops->flags &= ~FTRACE_OPS_FL_ENABLED;
|
||||
|
||||
command |= FTRACE_UPDATE_CALLS;
|
||||
@@ -3524,10 +3461,6 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
|
||||
struct ftrace_hash *hash;
|
||||
int ret;
|
||||
|
||||
/* All global ops uses the global ops filters */
|
||||
if (ops->flags & FTRACE_OPS_FL_GLOBAL)
|
||||
ops = &global_ops;
|
||||
|
||||
if (unlikely(ftrace_disabled))
|
||||
return -ENODEV;
|
||||
|
||||
@@ -3639,8 +3572,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ftrace_set_notrace);
|
||||
/**
|
||||
* ftrace_set_filter - set a function to filter on in ftrace
|
||||
* @ops - the ops to set the filter with
|
||||
* ftrace_set_global_filter - set a function to filter on with global tracers
|
||||
* @buf - the string that holds the function filter text.
|
||||
* @len - the length of the string.
|
||||
* @reset - non zero to reset all filters before applying this filter.
|
||||
@@ -3655,8 +3587,7 @@ void ftrace_set_global_filter(unsigned char *buf, int len, int reset)
|
||||
EXPORT_SYMBOL_GPL(ftrace_set_global_filter);
|
||||
|
||||
/**
|
||||
* ftrace_set_notrace - set a function to not trace in ftrace
|
||||
* @ops - the ops to set the notrace filter with
|
||||
* ftrace_set_global_notrace - set a function to not trace with global tracers
|
||||
* @buf - the string that holds the function notrace text.
|
||||
* @len - the length of the string.
|
||||
* @reset - non zero to reset all filters before applying this filter.
|
||||
@@ -4443,6 +4374,34 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
|
||||
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
__init void ftrace_init_global_array_ops(struct trace_array *tr)
|
||||
{
|
||||
tr->ops = &global_ops;
|
||||
tr->ops->private = tr;
|
||||
}
|
||||
|
||||
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
|
||||
{
|
||||
/* If we filter on pids, update to use the pid function */
|
||||
if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
|
||||
if (WARN_ON(tr->ops->func != ftrace_stub))
|
||||
printk("ftrace ops had %pS for function\n",
|
||||
tr->ops->func);
|
||||
/* Only the top level instance does pid tracing */
|
||||
if (!list_empty(&ftrace_pids)) {
|
||||
set_ftrace_pid_function(func);
|
||||
func = ftrace_pid_func;
|
||||
}
|
||||
}
|
||||
tr->ops->func = func;
|
||||
tr->ops->private = tr;
|
||||
}
|
||||
|
||||
void ftrace_reset_array_ops(struct trace_array *tr)
|
||||
{
|
||||
tr->ops->func = ftrace_stub;
|
||||
}
|
||||
|
||||
static void
|
||||
ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct pt_regs *regs)
|
||||
@@ -4501,9 +4460,16 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
|
||||
*/
|
||||
preempt_disable_notrace();
|
||||
do_for_each_ftrace_op(op, ftrace_ops_list) {
|
||||
if (ftrace_ops_test(op, ip, regs))
|
||||
if (ftrace_ops_test(op, ip, regs)) {
|
||||
if (WARN_ON(!op->func)) {
|
||||
function_trace_stop = 1;
|
||||
printk("op=%p %pS\n", op, op);
|
||||
goto out;
|
||||
}
|
||||
op->func(ip, parent_ip, op, regs);
|
||||
}
|
||||
} while_for_each_ftrace_op(op);
|
||||
out:
|
||||
preempt_enable_notrace();
|
||||
trace_clear_recursion(bit);
|
||||
}
|
||||
@@ -4908,7 +4874,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
|
||||
static int ftrace_graph_active;
|
||||
static struct notifier_block ftrace_suspend_notifier;
|
||||
|
||||
int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
|
||||
{
|
||||
@@ -5054,13 +5019,6 @@ ftrace_suspend_notifier_call(struct notifier_block *bl, unsigned long state,
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
/* Just a place holder for function graph */
|
||||
static struct ftrace_ops fgraph_ops __read_mostly = {
|
||||
.func = ftrace_stub,
|
||||
.flags = FTRACE_OPS_FL_STUB | FTRACE_OPS_FL_GLOBAL |
|
||||
FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
|
||||
static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
|
||||
{
|
||||
if (!ftrace_ops_test(&global_ops, trace->func, NULL))
|
||||
@@ -5085,6 +5043,10 @@ static void update_function_graph_func(void)
|
||||
ftrace_graph_entry = ftrace_graph_entry_test;
|
||||
}
|
||||
|
||||
static struct notifier_block ftrace_suspend_notifier = {
|
||||
.notifier_call = ftrace_suspend_notifier_call,
|
||||
};
|
||||
|
||||
int register_ftrace_graph(trace_func_graph_ret_t retfunc,
|
||||
trace_func_graph_ent_t entryfunc)
|
||||
{
|
||||
@@ -5098,7 +5060,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
|
||||
register_pm_notifier(&ftrace_suspend_notifier);
|
||||
|
||||
ftrace_graph_active++;
|
||||
@@ -5120,7 +5081,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
|
||||
ftrace_graph_entry = ftrace_graph_entry_test;
|
||||
update_function_graph_func();
|
||||
|
||||
ret = ftrace_startup(&fgraph_ops, FTRACE_START_FUNC_RET);
|
||||
/* Function graph doesn't use the .func field of global_ops */
|
||||
global_ops.flags |= FTRACE_OPS_FL_STUB;
|
||||
|
||||
ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET);
|
||||
|
||||
out:
|
||||
mutex_unlock(&ftrace_lock);
|
||||
@@ -5138,7 +5102,8 @@ void unregister_ftrace_graph(void)
|
||||
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
|
||||
ftrace_graph_entry = ftrace_graph_entry_stub;
|
||||
__ftrace_graph_entry = ftrace_graph_entry_stub;
|
||||
ftrace_shutdown(&fgraph_ops, FTRACE_STOP_FUNC_RET);
|
||||
ftrace_shutdown(&global_ops, FTRACE_STOP_FUNC_RET);
|
||||
global_ops.flags &= ~FTRACE_OPS_FL_STUB;
|
||||
unregister_pm_notifier(&ftrace_suspend_notifier);
|
||||
unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
|
||||
|
||||
|
@@ -543,7 +543,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
|
||||
* as data is added to any of the @buffer's cpu buffers. Otherwise
|
||||
* it will wait for data to be added to a specific cpu buffer.
|
||||
*/
|
||||
void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
|
||||
int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
|
||||
{
|
||||
struct ring_buffer_per_cpu *cpu_buffer;
|
||||
DEFINE_WAIT(wait);
|
||||
@@ -557,6 +557,8 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
|
||||
if (cpu == RING_BUFFER_ALL_CPUS)
|
||||
work = &buffer->irq_work;
|
||||
else {
|
||||
if (!cpumask_test_cpu(cpu, buffer->cpumask))
|
||||
return -ENODEV;
|
||||
cpu_buffer = buffer->buffers[cpu];
|
||||
work = &cpu_buffer->irq_work;
|
||||
}
|
||||
@@ -591,6 +593,7 @@ void ring_buffer_wait(struct ring_buffer *buffer, int cpu)
|
||||
schedule();
|
||||
|
||||
finish_wait(&work->waiters, &wait);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -275,7 +275,7 @@ int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_filter_check_discard);
|
||||
|
||||
cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
|
||||
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
|
||||
{
|
||||
u64 ts;
|
||||
|
||||
@@ -599,7 +599,7 @@ static int alloc_snapshot(struct trace_array *tr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void free_snapshot(struct trace_array *tr)
|
||||
static void free_snapshot(struct trace_array *tr)
|
||||
{
|
||||
/*
|
||||
* We don't free the ring buffer. instead, resize it because
|
||||
@@ -963,27 +963,9 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* ftrace_max_lock is used to protect the swapping of buffers
|
||||
* when taking a max snapshot. The buffers themselves are
|
||||
* protected by per_cpu spinlocks. But the action of the swap
|
||||
* needs its own lock.
|
||||
*
|
||||
* This is defined as a arch_spinlock_t in order to help
|
||||
* with performance when lockdep debugging is enabled.
|
||||
*
|
||||
* It is also used in other places outside the update_max_tr
|
||||
* so it needs to be defined outside of the
|
||||
* CONFIG_TRACER_MAX_TRACE.
|
||||
*/
|
||||
static arch_spinlock_t ftrace_max_lock =
|
||||
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
|
||||
|
||||
unsigned long __read_mostly tracing_thresh;
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
unsigned long __read_mostly tracing_max_latency;
|
||||
|
||||
/*
|
||||
* Copy the new maximum trace into the separate maximum-trace
|
||||
* structure. (this way the maximum trace is permanently saved,
|
||||
@@ -1000,7 +982,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
|
||||
max_buf->cpu = cpu;
|
||||
max_buf->time_start = data->preempt_timestamp;
|
||||
|
||||
max_data->saved_latency = tracing_max_latency;
|
||||
max_data->saved_latency = tr->max_latency;
|
||||
max_data->critical_start = data->critical_start;
|
||||
max_data->critical_end = data->critical_end;
|
||||
|
||||
@@ -1048,14 +1030,14 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
|
||||
return;
|
||||
}
|
||||
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
|
||||
buf = tr->trace_buffer.buffer;
|
||||
tr->trace_buffer.buffer = tr->max_buffer.buffer;
|
||||
tr->max_buffer.buffer = buf;
|
||||
|
||||
__update_max_tr(tr, tsk, cpu);
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1081,7 +1063,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
|
||||
return;
|
||||
}
|
||||
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
|
||||
ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
|
||||
|
||||
@@ -1099,17 +1081,17 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
|
||||
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
|
||||
|
||||
__update_max_tr(tr, tsk, cpu);
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
}
|
||||
#endif /* CONFIG_TRACER_MAX_TRACE */
|
||||
|
||||
static void default_wait_pipe(struct trace_iterator *iter)
|
||||
static int wait_on_pipe(struct trace_iterator *iter)
|
||||
{
|
||||
/* Iterators are static, they should be filled or empty */
|
||||
if (trace_buffer_iter(iter, iter->cpu_file))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
|
||||
return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FTRACE_STARTUP_TEST
|
||||
@@ -1220,8 +1202,6 @@ int register_tracer(struct tracer *type)
|
||||
else
|
||||
if (!type->flags->opts)
|
||||
type->flags->opts = dummy_tracer_opt;
|
||||
if (!type->wait_pipe)
|
||||
type->wait_pipe = default_wait_pipe;
|
||||
|
||||
ret = run_tracer_selftest(type);
|
||||
if (ret < 0)
|
||||
@@ -1305,22 +1285,71 @@ void tracing_reset_all_online_cpus(void)
|
||||
}
|
||||
}
|
||||
|
||||
#define SAVED_CMDLINES 128
|
||||
#define SAVED_CMDLINES_DEFAULT 128
|
||||
#define NO_CMDLINE_MAP UINT_MAX
|
||||
static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
|
||||
static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
|
||||
static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
|
||||
static int cmdline_idx;
|
||||
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
||||
struct saved_cmdlines_buffer {
|
||||
unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
|
||||
unsigned *map_cmdline_to_pid;
|
||||
unsigned cmdline_num;
|
||||
int cmdline_idx;
|
||||
char *saved_cmdlines;
|
||||
};
|
||||
static struct saved_cmdlines_buffer *savedcmd;
|
||||
|
||||
/* temporary disable recording */
|
||||
static atomic_t trace_record_cmdline_disabled __read_mostly;
|
||||
|
||||
static void trace_init_cmdlines(void)
|
||||
static inline char *get_saved_cmdlines(int idx)
|
||||
{
|
||||
memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
|
||||
memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
|
||||
cmdline_idx = 0;
|
||||
return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
|
||||
}
|
||||
|
||||
static inline void set_cmdline(int idx, const char *cmdline)
|
||||
{
|
||||
memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
|
||||
}
|
||||
|
||||
static int allocate_cmdlines_buffer(unsigned int val,
|
||||
struct saved_cmdlines_buffer *s)
|
||||
{
|
||||
s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
|
||||
GFP_KERNEL);
|
||||
if (!s->map_cmdline_to_pid)
|
||||
return -ENOMEM;
|
||||
|
||||
s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
|
||||
if (!s->saved_cmdlines) {
|
||||
kfree(s->map_cmdline_to_pid);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
s->cmdline_idx = 0;
|
||||
s->cmdline_num = val;
|
||||
memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
|
||||
sizeof(s->map_pid_to_cmdline));
|
||||
memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
|
||||
val * sizeof(*s->map_cmdline_to_pid));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int trace_create_savedcmd(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
|
||||
if (!savedcmd)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
|
||||
if (ret < 0) {
|
||||
kfree(savedcmd);
|
||||
savedcmd = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int is_tracing_stopped(void)
|
||||
@@ -1353,7 +1382,7 @@ void tracing_start(void)
|
||||
}
|
||||
|
||||
/* Prevent the buffers from switching */
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&global_trace.max_lock);
|
||||
|
||||
buffer = global_trace.trace_buffer.buffer;
|
||||
if (buffer)
|
||||
@@ -1365,9 +1394,8 @@ void tracing_start(void)
|
||||
ring_buffer_record_enable(buffer);
|
||||
#endif
|
||||
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&global_trace.max_lock);
|
||||
|
||||
ftrace_start();
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
|
||||
}
|
||||
@@ -1414,13 +1442,12 @@ void tracing_stop(void)
|
||||
struct ring_buffer *buffer;
|
||||
unsigned long flags;
|
||||
|
||||
ftrace_stop();
|
||||
raw_spin_lock_irqsave(&global_trace.start_lock, flags);
|
||||
if (global_trace.stop_count++)
|
||||
goto out;
|
||||
|
||||
/* Prevent the buffers from switching */
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&global_trace.max_lock);
|
||||
|
||||
buffer = global_trace.trace_buffer.buffer;
|
||||
if (buffer)
|
||||
@@ -1432,7 +1459,7 @@ void tracing_stop(void)
|
||||
ring_buffer_record_disable(buffer);
|
||||
#endif
|
||||
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&global_trace.max_lock);
|
||||
|
||||
out:
|
||||
raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
|
||||
@@ -1461,12 +1488,12 @@ static void tracing_stop_tr(struct trace_array *tr)
|
||||
|
||||
void trace_stop_cmdline_recording(void);
|
||||
|
||||
static void trace_save_cmdline(struct task_struct *tsk)
|
||||
static int trace_save_cmdline(struct task_struct *tsk)
|
||||
{
|
||||
unsigned pid, idx;
|
||||
|
||||
if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* It's not the end of the world if we don't get
|
||||
@@ -1475,11 +1502,11 @@ static void trace_save_cmdline(struct task_struct *tsk)
|
||||
* so if we miss here, then better luck next time.
|
||||
*/
|
||||
if (!arch_spin_trylock(&trace_cmdline_lock))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
idx = map_pid_to_cmdline[tsk->pid];
|
||||
idx = savedcmd->map_pid_to_cmdline[tsk->pid];
|
||||
if (idx == NO_CMDLINE_MAP) {
|
||||
idx = (cmdline_idx + 1) % SAVED_CMDLINES;
|
||||
idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
|
||||
|
||||
/*
|
||||
* Check whether the cmdline buffer at idx has a pid
|
||||
@@ -1487,22 +1514,24 @@ static void trace_save_cmdline(struct task_struct *tsk)
|
||||
* need to clear the map_pid_to_cmdline. Otherwise we
|
||||
* would read the new comm for the old pid.
|
||||
*/
|
||||
pid = map_cmdline_to_pid[idx];
|
||||
pid = savedcmd->map_cmdline_to_pid[idx];
|
||||
if (pid != NO_CMDLINE_MAP)
|
||||
map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
|
||||
savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
|
||||
|
||||
map_cmdline_to_pid[idx] = tsk->pid;
|
||||
map_pid_to_cmdline[tsk->pid] = idx;
|
||||
savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
|
||||
savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
|
||||
|
||||
cmdline_idx = idx;
|
||||
savedcmd->cmdline_idx = idx;
|
||||
}
|
||||
|
||||
memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
|
||||
set_cmdline(idx, tsk->comm);
|
||||
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void trace_find_cmdline(int pid, char comm[])
|
||||
static void __trace_find_cmdline(int pid, char comm[])
|
||||
{
|
||||
unsigned map;
|
||||
|
||||
@@ -1521,13 +1550,19 @@ void trace_find_cmdline(int pid, char comm[])
|
||||
return;
|
||||
}
|
||||
|
||||
preempt_disable();
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
map = map_pid_to_cmdline[pid];
|
||||
map = savedcmd->map_pid_to_cmdline[pid];
|
||||
if (map != NO_CMDLINE_MAP)
|
||||
strcpy(comm, saved_cmdlines[map]);
|
||||
strcpy(comm, get_saved_cmdlines(map));
|
||||
else
|
||||
strcpy(comm, "<...>");
|
||||
}
|
||||
|
||||
void trace_find_cmdline(int pid, char comm[])
|
||||
{
|
||||
preempt_disable();
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
|
||||
__trace_find_cmdline(pid, comm);
|
||||
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
preempt_enable();
|
||||
@@ -1541,9 +1576,8 @@ void tracing_record_cmdline(struct task_struct *tsk)
|
||||
if (!__this_cpu_read(trace_cmdline_save))
|
||||
return;
|
||||
|
||||
__this_cpu_write(trace_cmdline_save, false);
|
||||
|
||||
trace_save_cmdline(tsk);
|
||||
if (trace_save_cmdline(tsk))
|
||||
__this_cpu_write(trace_cmdline_save, false);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1746,7 +1780,7 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
|
||||
*/
|
||||
barrier();
|
||||
if (use_stack == 1) {
|
||||
trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
|
||||
trace.entries = this_cpu_ptr(ftrace_stack.calls);
|
||||
trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
|
||||
|
||||
if (regs)
|
||||
@@ -1995,7 +2029,21 @@ void trace_printk_init_buffers(void)
|
||||
if (alloc_percpu_trace_buffer())
|
||||
return;
|
||||
|
||||
pr_info("ftrace: Allocated trace_printk buffers\n");
|
||||
/* trace_printk() is for debug use only. Don't use it in production. */
|
||||
|
||||
pr_warning("\n**********************************************************\n");
|
||||
pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
|
||||
pr_warning("** **\n");
|
||||
pr_warning("** trace_printk() being used. Allocating extra memory. **\n");
|
||||
pr_warning("** **\n");
|
||||
pr_warning("** This means that this is a DEBUG kernel and it is **\n");
|
||||
pr_warning("** unsafe for produciton use. **\n");
|
||||
pr_warning("** **\n");
|
||||
pr_warning("** If you see this message and you are not debugging **\n");
|
||||
pr_warning("** the kernel, report this immediately to your vendor! **\n");
|
||||
pr_warning("** **\n");
|
||||
pr_warning("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
|
||||
pr_warning("**********************************************************\n");
|
||||
|
||||
/* Expand the buffers to set size */
|
||||
tracing_update_buffers();
|
||||
@@ -3333,7 +3381,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
|
||||
mutex_lock(&tracing_cpumask_update_lock);
|
||||
|
||||
local_irq_disable();
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&tr->max_lock);
|
||||
for_each_tracing_cpu(cpu) {
|
||||
/*
|
||||
* Increase/decrease the disabled counter if we are
|
||||
@@ -3350,7 +3398,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
|
||||
ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
|
||||
}
|
||||
}
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&tr->max_lock);
|
||||
local_irq_enable();
|
||||
|
||||
cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
|
||||
@@ -3592,6 +3640,7 @@ static const char readme_msg[] =
|
||||
" trace_options\t\t- Set format or modify how tracing happens\n"
|
||||
"\t\t\t Disable an option by adding a suffix 'no' to the\n"
|
||||
"\t\t\t option name\n"
|
||||
" saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
"\n available_filter_functions - list of functions that can be filtered on\n"
|
||||
" set_ftrace_filter\t- echo function name in here to only trace these\n"
|
||||
@@ -3705,55 +3754,153 @@ static const struct file_operations tracing_readme_fops = {
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
{
|
||||
char *buf_comm;
|
||||
char *file_buf;
|
||||
char *buf;
|
||||
int len = 0;
|
||||
int pid;
|
||||
int i;
|
||||
unsigned int *ptr = v;
|
||||
|
||||
file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
|
||||
if (!file_buf)
|
||||
return -ENOMEM;
|
||||
if (*pos || m->count)
|
||||
ptr++;
|
||||
|
||||
buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
|
||||
if (!buf_comm) {
|
||||
kfree(file_buf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
(*pos)++;
|
||||
|
||||
buf = file_buf;
|
||||
|
||||
for (i = 0; i < SAVED_CMDLINES; i++) {
|
||||
int r;
|
||||
|
||||
pid = map_cmdline_to_pid[i];
|
||||
if (pid == -1 || pid == NO_CMDLINE_MAP)
|
||||
for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
|
||||
ptr++) {
|
||||
if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
|
||||
continue;
|
||||
|
||||
trace_find_cmdline(pid, buf_comm);
|
||||
r = sprintf(buf, "%d %s\n", pid, buf_comm);
|
||||
buf += r;
|
||||
len += r;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
len = simple_read_from_buffer(ubuf, cnt, ppos,
|
||||
file_buf, len);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
kfree(file_buf);
|
||||
kfree(buf_comm);
|
||||
static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
|
||||
{
|
||||
void *v;
|
||||
loff_t l = 0;
|
||||
|
||||
return len;
|
||||
preempt_disable();
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
|
||||
v = &savedcmd->map_cmdline_to_pid[0];
|
||||
while (l <= *pos) {
|
||||
v = saved_cmdlines_next(m, v, &l);
|
||||
if (!v)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
static void saved_cmdlines_stop(struct seq_file *m, void *v)
|
||||
{
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static int saved_cmdlines_show(struct seq_file *m, void *v)
|
||||
{
|
||||
char buf[TASK_COMM_LEN];
|
||||
unsigned int *pid = v;
|
||||
|
||||
__trace_find_cmdline(*pid, buf);
|
||||
seq_printf(m, "%d %s\n", *pid, buf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
|
||||
.start = saved_cmdlines_start,
|
||||
.next = saved_cmdlines_next,
|
||||
.stop = saved_cmdlines_stop,
|
||||
.show = saved_cmdlines_show,
|
||||
};
|
||||
|
||||
static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
if (tracing_disabled)
|
||||
return -ENODEV;
|
||||
|
||||
return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
|
||||
}
|
||||
|
||||
static const struct file_operations tracing_saved_cmdlines_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.read = tracing_saved_cmdlines_read,
|
||||
.llseek = generic_file_llseek,
|
||||
.open = tracing_saved_cmdlines_open,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = seq_release,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
char buf[64];
|
||||
int r;
|
||||
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
|
||||
return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
|
||||
}
|
||||
|
||||
static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
|
||||
{
|
||||
kfree(s->saved_cmdlines);
|
||||
kfree(s->map_cmdline_to_pid);
|
||||
kfree(s);
|
||||
}
|
||||
|
||||
static int tracing_resize_saved_cmdlines(unsigned int val)
|
||||
{
|
||||
struct saved_cmdlines_buffer *s, *savedcmd_temp;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
|
||||
if (allocate_cmdlines_buffer(val, s) < 0) {
|
||||
kfree(s);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
arch_spin_lock(&trace_cmdline_lock);
|
||||
savedcmd_temp = savedcmd;
|
||||
savedcmd = s;
|
||||
arch_spin_unlock(&trace_cmdline_lock);
|
||||
free_saved_cmdlines_buffer(savedcmd_temp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
|
||||
size_t cnt, loff_t *ppos)
|
||||
{
|
||||
unsigned long val;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* must have at least 1 entry or less than PID_MAX_DEFAULT */
|
||||
if (!val || val > PID_MAX_DEFAULT)
|
||||
return -EINVAL;
|
||||
|
||||
ret = tracing_resize_saved_cmdlines((unsigned int)val);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
*ppos += cnt;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static const struct file_operations tracing_saved_cmdlines_size_fops = {
|
||||
.open = tracing_open_generic,
|
||||
.read = tracing_saved_cmdlines_size_read,
|
||||
.write = tracing_saved_cmdlines_size_write,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
@@ -4225,29 +4372,11 @@ tracing_poll_pipe(struct file *filp, poll_table *poll_table)
|
||||
return trace_poll(iter, filp, poll_table);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a make-shift waitqueue.
|
||||
* A tracer might use this callback on some rare cases:
|
||||
*
|
||||
* 1) the current tracer might hold the runqueue lock when it wakes up
|
||||
* a reader, hence a deadlock (sched, function, and function graph tracers)
|
||||
* 2) the function tracers, trace all functions, we don't want
|
||||
* the overhead of calling wake_up and friends
|
||||
* (and tracing them too)
|
||||
*
|
||||
* Anyway, this is really very primitive wakeup.
|
||||
*/
|
||||
void poll_wait_pipe(struct trace_iterator *iter)
|
||||
{
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
/* sleep for 100 msecs, and try again. */
|
||||
schedule_timeout(HZ / 10);
|
||||
}
|
||||
|
||||
/* Must be called with trace_types_lock mutex held. */
|
||||
static int tracing_wait_pipe(struct file *filp)
|
||||
{
|
||||
struct trace_iterator *iter = filp->private_data;
|
||||
int ret;
|
||||
|
||||
while (trace_empty(iter)) {
|
||||
|
||||
@@ -4255,15 +4384,6 @@ static int tracing_wait_pipe(struct file *filp)
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
mutex_unlock(&iter->mutex);
|
||||
|
||||
iter->trace->wait_pipe(iter);
|
||||
|
||||
mutex_lock(&iter->mutex);
|
||||
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
|
||||
/*
|
||||
* We block until we read something and tracing is disabled.
|
||||
* We still block if tracing is disabled, but we have never
|
||||
@@ -4275,6 +4395,18 @@ static int tracing_wait_pipe(struct file *filp)
|
||||
*/
|
||||
if (!tracing_is_on() && iter->pos)
|
||||
break;
|
||||
|
||||
mutex_unlock(&iter->mutex);
|
||||
|
||||
ret = wait_on_pipe(iter);
|
||||
|
||||
mutex_lock(&iter->mutex);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (signal_pending(current))
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
return 1;
|
||||
@@ -5197,8 +5329,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
|
||||
goto out_unlock;
|
||||
}
|
||||
mutex_unlock(&trace_types_lock);
|
||||
iter->trace->wait_pipe(iter);
|
||||
ret = wait_on_pipe(iter);
|
||||
mutex_lock(&trace_types_lock);
|
||||
if (ret) {
|
||||
size = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (signal_pending(current)) {
|
||||
size = -EINTR;
|
||||
goto out_unlock;
|
||||
@@ -5408,8 +5544,10 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
|
||||
goto out;
|
||||
}
|
||||
mutex_unlock(&trace_types_lock);
|
||||
iter->trace->wait_pipe(iter);
|
||||
ret = wait_on_pipe(iter);
|
||||
mutex_lock(&trace_types_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
@@ -6102,6 +6240,28 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_trace_buffer(struct trace_buffer *buf)
|
||||
{
|
||||
if (buf->buffer) {
|
||||
ring_buffer_free(buf->buffer);
|
||||
buf->buffer = NULL;
|
||||
free_percpu(buf->data);
|
||||
buf->data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void free_trace_buffers(struct trace_array *tr)
|
||||
{
|
||||
if (!tr)
|
||||
return;
|
||||
|
||||
free_trace_buffer(&tr->trace_buffer);
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
free_trace_buffer(&tr->max_buffer);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int new_instance_create(const char *name)
|
||||
{
|
||||
struct trace_array *tr;
|
||||
@@ -6131,6 +6291,8 @@ static int new_instance_create(const char *name)
|
||||
|
||||
raw_spin_lock_init(&tr->start_lock);
|
||||
|
||||
tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
|
||||
|
||||
tr->current_trace = &nop_trace;
|
||||
|
||||
INIT_LIST_HEAD(&tr->systems);
|
||||
@@ -6158,8 +6320,7 @@ static int new_instance_create(const char *name)
|
||||
return 0;
|
||||
|
||||
out_free_tr:
|
||||
if (tr->trace_buffer.buffer)
|
||||
ring_buffer_free(tr->trace_buffer.buffer);
|
||||
free_trace_buffers(tr);
|
||||
free_cpumask_var(tr->tracing_cpumask);
|
||||
kfree(tr->name);
|
||||
kfree(tr);
|
||||
@@ -6199,8 +6360,7 @@ static int instance_delete(const char *name)
|
||||
event_trace_del_tracer(tr);
|
||||
ftrace_destroy_function_files(tr);
|
||||
debugfs_remove_recursive(tr->dir);
|
||||
free_percpu(tr->trace_buffer.data);
|
||||
ring_buffer_free(tr->trace_buffer.buffer);
|
||||
free_trace_buffers(tr);
|
||||
|
||||
kfree(tr->name);
|
||||
kfree(tr);
|
||||
@@ -6328,6 +6488,11 @@ init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
|
||||
trace_create_file("tracing_on", 0644, d_tracer,
|
||||
tr, &rb_simple_fops);
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
trace_create_file("tracing_max_latency", 0644, d_tracer,
|
||||
&tr->max_latency, &tracing_max_lat_fops);
|
||||
#endif
|
||||
|
||||
if (ftrace_create_function_files(tr, d_tracer))
|
||||
WARN(1, "Could not allocate function filter files");
|
||||
|
||||
@@ -6353,11 +6518,6 @@ static __init int tracer_init_debugfs(void)
|
||||
|
||||
init_tracer_debugfs(&global_trace, d_tracer);
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
trace_create_file("tracing_max_latency", 0644, d_tracer,
|
||||
&tracing_max_latency, &tracing_max_lat_fops);
|
||||
#endif
|
||||
|
||||
trace_create_file("tracing_thresh", 0644, d_tracer,
|
||||
&tracing_thresh, &tracing_max_lat_fops);
|
||||
|
||||
@@ -6367,6 +6527,9 @@ static __init int tracer_init_debugfs(void)
|
||||
trace_create_file("saved_cmdlines", 0444, d_tracer,
|
||||
NULL, &tracing_saved_cmdlines_fops);
|
||||
|
||||
trace_create_file("saved_cmdlines_size", 0644, d_tracer,
|
||||
NULL, &tracing_saved_cmdlines_size_fops);
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
|
||||
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
|
||||
@@ -6603,18 +6766,19 @@ __init static int tracer_alloc_buffers(void)
|
||||
if (!temp_buffer)
|
||||
goto out_free_cpumask;
|
||||
|
||||
if (trace_create_savedcmd() < 0)
|
||||
goto out_free_temp_buffer;
|
||||
|
||||
/* TODO: make the number of buffers hot pluggable with CPUS */
|
||||
if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
|
||||
printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
|
||||
WARN_ON(1);
|
||||
goto out_free_temp_buffer;
|
||||
goto out_free_savedcmd;
|
||||
}
|
||||
|
||||
if (global_trace.buffer_disabled)
|
||||
tracing_off();
|
||||
|
||||
trace_init_cmdlines();
|
||||
|
||||
if (trace_boot_clock) {
|
||||
ret = tracing_set_clock(&global_trace, trace_boot_clock);
|
||||
if (ret < 0)
|
||||
@@ -6629,6 +6793,10 @@ __init static int tracer_alloc_buffers(void)
|
||||
*/
|
||||
global_trace.current_trace = &nop_trace;
|
||||
|
||||
global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
|
||||
|
||||
ftrace_init_global_array_ops(&global_trace);
|
||||
|
||||
register_tracer(&nop_trace);
|
||||
|
||||
/* All seems OK, enable tracing */
|
||||
@@ -6656,13 +6824,11 @@ __init static int tracer_alloc_buffers(void)
|
||||
|
||||
return 0;
|
||||
|
||||
out_free_savedcmd:
|
||||
free_saved_cmdlines_buffer(savedcmd);
|
||||
out_free_temp_buffer:
|
||||
ring_buffer_free(temp_buffer);
|
||||
out_free_cpumask:
|
||||
free_percpu(global_trace.trace_buffer.data);
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
free_percpu(global_trace.max_buffer.data);
|
||||
#endif
|
||||
free_cpumask_var(global_trace.tracing_cpumask);
|
||||
out_free_buffer_mask:
|
||||
free_cpumask_var(tracing_buffer_mask);
|
||||
|
@@ -190,7 +190,22 @@ struct trace_array {
|
||||
*/
|
||||
struct trace_buffer max_buffer;
|
||||
bool allocated_snapshot;
|
||||
unsigned long max_latency;
|
||||
#endif
|
||||
/*
|
||||
* max_lock is used to protect the swapping of buffers
|
||||
* when taking a max snapshot. The buffers themselves are
|
||||
* protected by per_cpu spinlocks. But the action of the swap
|
||||
* needs its own lock.
|
||||
*
|
||||
* This is defined as a arch_spinlock_t in order to help
|
||||
* with performance when lockdep debugging is enabled.
|
||||
*
|
||||
* It is also used in other places outside the update_max_tr
|
||||
* so it needs to be defined outside of the
|
||||
* CONFIG_TRACER_MAX_TRACE.
|
||||
*/
|
||||
arch_spinlock_t max_lock;
|
||||
int buffer_disabled;
|
||||
#ifdef CONFIG_FTRACE_SYSCALLS
|
||||
int sys_refcount_enter;
|
||||
@@ -237,6 +252,9 @@ static inline struct trace_array *top_trace_array(void)
|
||||
{
|
||||
struct trace_array *tr;
|
||||
|
||||
if (list_empty(&ftrace_trace_arrays))
|
||||
return NULL;
|
||||
|
||||
tr = list_entry(ftrace_trace_arrays.prev,
|
||||
typeof(*tr), list);
|
||||
WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));
|
||||
@@ -323,7 +341,6 @@ struct tracer_flags {
|
||||
* @stop: called when tracing is paused (echo 0 > tracing_enabled)
|
||||
* @open: called when the trace file is opened
|
||||
* @pipe_open: called when the trace_pipe file is opened
|
||||
* @wait_pipe: override how the user waits for traces on trace_pipe
|
||||
* @close: called when the trace file is released
|
||||
* @pipe_close: called when the trace_pipe file is released
|
||||
* @read: override the default read callback on trace_pipe
|
||||
@@ -342,7 +359,6 @@ struct tracer {
|
||||
void (*stop)(struct trace_array *tr);
|
||||
void (*open)(struct trace_iterator *iter);
|
||||
void (*pipe_open)(struct trace_iterator *iter);
|
||||
void (*wait_pipe)(struct trace_iterator *iter);
|
||||
void (*close)(struct trace_iterator *iter);
|
||||
void (*pipe_close)(struct trace_iterator *iter);
|
||||
ssize_t (*read)(struct trace_iterator *iter,
|
||||
@@ -416,13 +432,7 @@ enum {
|
||||
TRACE_FTRACE_IRQ_BIT,
|
||||
TRACE_FTRACE_SIRQ_BIT,
|
||||
|
||||
/* GLOBAL_BITs must be greater than FTRACE_BITs */
|
||||
TRACE_GLOBAL_BIT,
|
||||
TRACE_GLOBAL_NMI_BIT,
|
||||
TRACE_GLOBAL_IRQ_BIT,
|
||||
TRACE_GLOBAL_SIRQ_BIT,
|
||||
|
||||
/* INTERNAL_BITs must be greater than GLOBAL_BITs */
|
||||
/* INTERNAL_BITs must be greater than FTRACE_BITs */
|
||||
TRACE_INTERNAL_BIT,
|
||||
TRACE_INTERNAL_NMI_BIT,
|
||||
TRACE_INTERNAL_IRQ_BIT,
|
||||
@@ -449,9 +459,6 @@ enum {
|
||||
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
|
||||
#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
|
||||
|
||||
#define TRACE_GLOBAL_START TRACE_GLOBAL_BIT
|
||||
#define TRACE_GLOBAL_MAX ((1 << (TRACE_GLOBAL_START + TRACE_CONTEXT_BITS)) - 1)
|
||||
|
||||
#define TRACE_LIST_START TRACE_INTERNAL_BIT
|
||||
#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
|
||||
|
||||
@@ -560,8 +567,6 @@ void trace_init_global_iter(struct trace_iterator *iter);
|
||||
|
||||
void tracing_iter_reset(struct trace_iterator *iter, int cpu);
|
||||
|
||||
void poll_wait_pipe(struct trace_iterator *iter);
|
||||
|
||||
void tracing_sched_switch_trace(struct trace_array *tr,
|
||||
struct task_struct *prev,
|
||||
struct task_struct *next,
|
||||
@@ -608,8 +613,6 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);
|
||||
extern unsigned long tracing_thresh;
|
||||
|
||||
#ifdef CONFIG_TRACER_MAX_TRACE
|
||||
extern unsigned long tracing_max_latency;
|
||||
|
||||
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
|
||||
void update_max_tr_single(struct trace_array *tr,
|
||||
struct task_struct *tsk, int cpu);
|
||||
@@ -724,6 +727,8 @@ extern unsigned long trace_flags;
|
||||
#define TRACE_GRAPH_PRINT_PROC 0x8
|
||||
#define TRACE_GRAPH_PRINT_DURATION 0x10
|
||||
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
|
||||
#define TRACE_GRAPH_PRINT_IRQS 0x40
|
||||
#define TRACE_GRAPH_PRINT_TAIL 0x80
|
||||
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
|
||||
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
|
||||
|
||||
@@ -823,6 +828,10 @@ extern int ftrace_is_dead(void);
|
||||
int ftrace_create_function_files(struct trace_array *tr,
|
||||
struct dentry *parent);
|
||||
void ftrace_destroy_function_files(struct trace_array *tr);
|
||||
void ftrace_init_global_array_ops(struct trace_array *tr);
|
||||
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
|
||||
void ftrace_reset_array_ops(struct trace_array *tr);
|
||||
int using_ftrace_ops_list_func(void);
|
||||
#else
|
||||
static inline int ftrace_trace_task(struct task_struct *task)
|
||||
{
|
||||
@@ -836,6 +845,11 @@ ftrace_create_function_files(struct trace_array *tr,
|
||||
return 0;
|
||||
}
|
||||
static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
|
||||
static inline __init void
|
||||
ftrace_init_global_array_ops(struct trace_array *tr) { }
|
||||
static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
|
||||
/* ftace_func_t type is not defined, use macro instead of static inline */
|
||||
#define ftrace_init_array_ops(tr, func) do { } while (0)
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
|
||||
|
198
kernel/trace/trace_benchmark.c
Normal file
198
kernel/trace/trace_benchmark.c
Normal file
@@ -0,0 +1,198 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/trace_clock.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "trace_benchmark.h"
|
||||
|
||||
static struct task_struct *bm_event_thread;
|
||||
|
||||
static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
|
||||
|
||||
static u64 bm_total;
|
||||
static u64 bm_totalsq;
|
||||
static u64 bm_last;
|
||||
static u64 bm_max;
|
||||
static u64 bm_min;
|
||||
static u64 bm_first;
|
||||
static u64 bm_cnt;
|
||||
static u64 bm_stddev;
|
||||
static unsigned int bm_avg;
|
||||
static unsigned int bm_std;
|
||||
|
||||
/*
|
||||
* This gets called in a loop recording the time it took to write
|
||||
* the tracepoint. What it writes is the time statistics of the last
|
||||
* tracepoint write. As there is nothing to write the first time
|
||||
* it simply writes "START". As the first write is cold cache and
|
||||
* the rest is hot, we save off that time in bm_first and it is
|
||||
* reported as "first", which is shown in the second write to the
|
||||
* tracepoint. The "first" field is writen within the statics from
|
||||
* then on but never changes.
|
||||
*/
|
||||
static void trace_do_benchmark(void)
|
||||
{
|
||||
u64 start;
|
||||
u64 stop;
|
||||
u64 delta;
|
||||
u64 stddev;
|
||||
u64 seed;
|
||||
u64 last_seed;
|
||||
unsigned int avg;
|
||||
unsigned int std = 0;
|
||||
|
||||
/* Only run if the tracepoint is actually active */
|
||||
if (!trace_benchmark_event_enabled())
|
||||
return;
|
||||
|
||||
local_irq_disable();
|
||||
start = trace_clock_local();
|
||||
trace_benchmark_event(bm_str);
|
||||
stop = trace_clock_local();
|
||||
local_irq_enable();
|
||||
|
||||
bm_cnt++;
|
||||
|
||||
delta = stop - start;
|
||||
|
||||
/*
|
||||
* The first read is cold cached, keep it separate from the
|
||||
* other calculations.
|
||||
*/
|
||||
if (bm_cnt == 1) {
|
||||
bm_first = delta;
|
||||
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
|
||||
"first=%llu [COLD CACHED]", bm_first);
|
||||
return;
|
||||
}
|
||||
|
||||
bm_last = delta;
|
||||
|
||||
if (delta > bm_max)
|
||||
bm_max = delta;
|
||||
if (!bm_min || delta < bm_min)
|
||||
bm_min = delta;
|
||||
|
||||
/*
|
||||
* When bm_cnt is greater than UINT_MAX, it breaks the statistics
|
||||
* accounting. Freeze the statistics when that happens.
|
||||
* We should have enough data for the avg and stddev anyway.
|
||||
*/
|
||||
if (bm_cnt > UINT_MAX) {
|
||||
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
|
||||
"last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
|
||||
bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
|
||||
return;
|
||||
}
|
||||
|
||||
bm_total += delta;
|
||||
bm_totalsq += delta * delta;
|
||||
|
||||
|
||||
if (bm_cnt > 1) {
|
||||
/*
|
||||
* Apply Welford's method to calculate standard deviation:
|
||||
* s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
|
||||
*/
|
||||
stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
|
||||
do_div(stddev, (u32)bm_cnt);
|
||||
do_div(stddev, (u32)bm_cnt - 1);
|
||||
} else
|
||||
stddev = 0;
|
||||
|
||||
delta = bm_total;
|
||||
do_div(delta, bm_cnt);
|
||||
avg = delta;
|
||||
|
||||
if (stddev > 0) {
|
||||
int i = 0;
|
||||
/*
|
||||
* stddev is the square of standard deviation but
|
||||
* we want the actualy number. Use the average
|
||||
* as our seed to find the std.
|
||||
*
|
||||
* The next try is:
|
||||
* x = (x + N/x) / 2
|
||||
*
|
||||
* Where N is the squared number to find the square
|
||||
* root of.
|
||||
*/
|
||||
seed = avg;
|
||||
do {
|
||||
last_seed = seed;
|
||||
seed = stddev;
|
||||
if (!last_seed)
|
||||
break;
|
||||
do_div(seed, last_seed);
|
||||
seed += last_seed;
|
||||
do_div(seed, 2);
|
||||
} while (i++ < 10 && last_seed != seed);
|
||||
|
||||
std = seed;
|
||||
}
|
||||
|
||||
scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
|
||||
"last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
|
||||
bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
|
||||
|
||||
bm_std = std;
|
||||
bm_avg = avg;
|
||||
bm_stddev = stddev;
|
||||
}
|
||||
|
||||
static int benchmark_event_kthread(void *arg)
|
||||
{
|
||||
/* sleep a bit to make sure the tracepoint gets activated */
|
||||
msleep(100);
|
||||
|
||||
while (!kthread_should_stop()) {
|
||||
|
||||
trace_do_benchmark();
|
||||
|
||||
/*
|
||||
* We don't go to sleep, but let others
|
||||
* run as well.
|
||||
*/
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* When the benchmark tracepoint is enabled, it calls this
|
||||
* function and the thread that calls the tracepoint is created.
|
||||
*/
|
||||
void trace_benchmark_reg(void)
|
||||
{
|
||||
bm_event_thread = kthread_run(benchmark_event_kthread,
|
||||
NULL, "event_benchmark");
|
||||
WARN_ON(!bm_event_thread);
|
||||
}
|
||||
|
||||
/*
|
||||
* When the benchmark tracepoint is disabled, it calls this
|
||||
* function and the thread that calls the tracepoint is deleted
|
||||
* and all the numbers are reset.
|
||||
*/
|
||||
void trace_benchmark_unreg(void)
|
||||
{
|
||||
if (!bm_event_thread)
|
||||
return;
|
||||
|
||||
kthread_stop(bm_event_thread);
|
||||
|
||||
strcpy(bm_str, "START");
|
||||
bm_total = 0;
|
||||
bm_totalsq = 0;
|
||||
bm_last = 0;
|
||||
bm_max = 0;
|
||||
bm_min = 0;
|
||||
bm_cnt = 0;
|
||||
/* These don't need to be reset but reset them anyway */
|
||||
bm_first = 0;
|
||||
bm_std = 0;
|
||||
bm_avg = 0;
|
||||
bm_stddev = 0;
|
||||
}
|
41
kernel/trace/trace_benchmark.h
Normal file
41
kernel/trace/trace_benchmark.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM benchmark
|
||||
|
||||
#if !defined(_TRACE_BENCHMARK_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_BENCHMARK_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
extern void trace_benchmark_reg(void);
|
||||
extern void trace_benchmark_unreg(void);
|
||||
|
||||
#define BENCHMARK_EVENT_STRLEN 128
|
||||
|
||||
TRACE_EVENT_FN(benchmark_event,
|
||||
|
||||
TP_PROTO(const char *str),
|
||||
|
||||
TP_ARGS(str),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__array( char, str, BENCHMARK_EVENT_STRLEN )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
memcpy(__entry->str, str, BENCHMARK_EVENT_STRLEN);
|
||||
),
|
||||
|
||||
TP_printk("%s", __entry->str),
|
||||
|
||||
trace_benchmark_reg, trace_benchmark_unreg
|
||||
);
|
||||
|
||||
#endif /* _TRACE_BENCHMARK_H */
|
||||
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#define TRACE_INCLUDE_FILE trace_benchmark
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
@@ -248,8 +248,8 @@ void perf_trace_del(struct perf_event *p_event, int flags)
|
||||
tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
|
||||
}
|
||||
|
||||
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
struct pt_regs *regs, int *rctxp)
|
||||
void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
struct pt_regs *regs, int *rctxp)
|
||||
{
|
||||
struct trace_entry *entry;
|
||||
unsigned long flags;
|
||||
@@ -281,6 +281,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
return raw_data;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
|
||||
NOKPROBE_SYMBOL(perf_trace_buf_prepare);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_TRACER
|
||||
static void
|
||||
|
@@ -574,6 +574,9 @@ int trace_set_clr_event(const char *system, const char *event, int set)
|
||||
{
|
||||
struct trace_array *tr = top_trace_array();
|
||||
|
||||
if (!tr)
|
||||
return -ENODEV;
|
||||
|
||||
return __ftrace_set_clr_event(tr, NULL, system, event, set);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_set_clr_event);
|
||||
@@ -2065,6 +2068,9 @@ event_enable_func(struct ftrace_hash *hash,
|
||||
bool enable;
|
||||
int ret;
|
||||
|
||||
if (!tr)
|
||||
return -ENODEV;
|
||||
|
||||
/* hash funcs only work with set_ftrace_filter */
|
||||
if (!enabled || !param)
|
||||
return -EINVAL;
|
||||
@@ -2396,6 +2402,9 @@ static __init int event_trace_enable(void)
|
||||
char *token;
|
||||
int ret;
|
||||
|
||||
if (!tr)
|
||||
return -ENODEV;
|
||||
|
||||
for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
|
||||
|
||||
call = *iter;
|
||||
@@ -2442,6 +2451,8 @@ static __init int event_trace_init(void)
|
||||
int ret;
|
||||
|
||||
tr = top_trace_array();
|
||||
if (!tr)
|
||||
return -ENODEV;
|
||||
|
||||
d_tracer = tracing_init_dentry();
|
||||
if (!d_tracer)
|
||||
@@ -2535,6 +2546,8 @@ static __init void event_trace_self_tests(void)
|
||||
int ret;
|
||||
|
||||
tr = top_trace_array();
|
||||
if (!tr)
|
||||
return;
|
||||
|
||||
pr_info("Running tests on trace events:\n");
|
||||
|
||||
|
@@ -26,8 +26,6 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
static void
|
||||
function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
struct ftrace_ops *op, struct pt_regs *pt_regs);
|
||||
static struct ftrace_ops trace_ops;
|
||||
static struct ftrace_ops trace_stack_ops;
|
||||
static struct tracer_flags func_flags;
|
||||
|
||||
/* Our option */
|
||||
@@ -83,28 +81,24 @@ void ftrace_destroy_function_files(struct trace_array *tr)
|
||||
|
||||
static int function_trace_init(struct trace_array *tr)
|
||||
{
|
||||
struct ftrace_ops *ops;
|
||||
ftrace_func_t func;
|
||||
|
||||
if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
|
||||
/* There's only one global tr */
|
||||
if (!trace_ops.private) {
|
||||
trace_ops.private = tr;
|
||||
trace_stack_ops.private = tr;
|
||||
}
|
||||
|
||||
if (func_flags.val & TRACE_FUNC_OPT_STACK)
|
||||
ops = &trace_stack_ops;
|
||||
else
|
||||
ops = &trace_ops;
|
||||
tr->ops = ops;
|
||||
} else if (!tr->ops) {
|
||||
/*
|
||||
* Instance trace_arrays get their ops allocated
|
||||
* at instance creation. Unless it failed
|
||||
* the allocation.
|
||||
*/
|
||||
/*
|
||||
* Instance trace_arrays get their ops allocated
|
||||
* at instance creation. Unless it failed
|
||||
* the allocation.
|
||||
*/
|
||||
if (!tr->ops)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Currently only the global instance can do stack tracing */
|
||||
if (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
|
||||
func_flags.val & TRACE_FUNC_OPT_STACK)
|
||||
func = function_stack_trace_call;
|
||||
else
|
||||
func = function_trace_call;
|
||||
|
||||
ftrace_init_array_ops(tr, func);
|
||||
|
||||
tr->trace_buffer.cpu = get_cpu();
|
||||
put_cpu();
|
||||
@@ -118,6 +112,7 @@ static void function_trace_reset(struct trace_array *tr)
|
||||
{
|
||||
tracing_stop_function_trace(tr);
|
||||
tracing_stop_cmdline_record();
|
||||
ftrace_reset_array_ops(tr);
|
||||
}
|
||||
|
||||
static void function_trace_start(struct trace_array *tr)
|
||||
@@ -199,18 +194,6 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static struct ftrace_ops trace_ops __read_mostly =
|
||||
{
|
||||
.func = function_trace_call,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
|
||||
static struct ftrace_ops trace_stack_ops __read_mostly =
|
||||
{
|
||||
.func = function_stack_trace_call,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
|
||||
static struct tracer_opt func_opts[] = {
|
||||
#ifdef CONFIG_STACKTRACE
|
||||
{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
|
||||
@@ -248,10 +231,10 @@ func_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
|
||||
unregister_ftrace_function(tr->ops);
|
||||
|
||||
if (set) {
|
||||
tr->ops = &trace_stack_ops;
|
||||
tr->ops->func = function_stack_trace_call;
|
||||
register_ftrace_function(tr->ops);
|
||||
} else {
|
||||
tr->ops = &trace_ops;
|
||||
tr->ops->func = function_trace_call;
|
||||
register_ftrace_function(tr->ops);
|
||||
}
|
||||
|
||||
@@ -269,7 +252,6 @@ static struct tracer function_trace __tracer_data =
|
||||
.init = function_trace_init,
|
||||
.reset = function_trace_reset,
|
||||
.start = function_trace_start,
|
||||
.wait_pipe = poll_wait_pipe,
|
||||
.flags = &func_flags,
|
||||
.set_flag = func_set_flag,
|
||||
.allow_instances = true,
|
||||
|
@@ -38,15 +38,6 @@ struct fgraph_data {
|
||||
|
||||
#define TRACE_GRAPH_INDENT 2
|
||||
|
||||
/* Flag options */
|
||||
#define TRACE_GRAPH_PRINT_OVERRUN 0x1
|
||||
#define TRACE_GRAPH_PRINT_CPU 0x2
|
||||
#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
|
||||
#define TRACE_GRAPH_PRINT_PROC 0x8
|
||||
#define TRACE_GRAPH_PRINT_DURATION 0x10
|
||||
#define TRACE_GRAPH_PRINT_ABS_TIME 0x20
|
||||
#define TRACE_GRAPH_PRINT_IRQS 0x40
|
||||
|
||||
static unsigned int max_depth;
|
||||
|
||||
static struct tracer_opt trace_opts[] = {
|
||||
@@ -64,11 +55,13 @@ static struct tracer_opt trace_opts[] = {
|
||||
{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
|
||||
/* Display interrupts */
|
||||
{ TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) },
|
||||
/* Display function name after trailing } */
|
||||
{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
|
||||
{ } /* Empty entry */
|
||||
};
|
||||
|
||||
static struct tracer_flags tracer_flags = {
|
||||
/* Don't display overruns and proc by default */
|
||||
/* Don't display overruns, proc, or tail by default */
|
||||
.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
|
||||
TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS,
|
||||
.opts = trace_opts
|
||||
@@ -1176,9 +1169,10 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
|
||||
* If the return function does not have a matching entry,
|
||||
* then the entry was lost. Instead of just printing
|
||||
* the '}' and letting the user guess what function this
|
||||
* belongs to, write out the function name.
|
||||
* belongs to, write out the function name. Always do
|
||||
* that if the funcgraph-tail option is enabled.
|
||||
*/
|
||||
if (func_match) {
|
||||
if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
|
||||
ret = trace_seq_puts(s, "}\n");
|
||||
if (!ret)
|
||||
return TRACE_TYPE_PARTIAL_LINE;
|
||||
@@ -1505,7 +1499,6 @@ static struct tracer graph_trace __tracer_data = {
|
||||
.pipe_open = graph_trace_open,
|
||||
.close = graph_trace_close,
|
||||
.pipe_close = graph_trace_close,
|
||||
.wait_pipe = poll_wait_pipe,
|
||||
.init = graph_trace_init,
|
||||
.reset = graph_trace_reset,
|
||||
.print_line = print_graph_function,
|
||||
|
@@ -151,12 +151,6 @@ irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
|
||||
|
||||
atomic_dec(&data->disabled);
|
||||
}
|
||||
|
||||
static struct ftrace_ops trace_ops __read_mostly =
|
||||
{
|
||||
.func = irqsoff_tracer_call,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
@@ -176,7 +170,7 @@ irqsoff_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(tracing_cpu, cpu) = 0;
|
||||
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
tracing_reset_online_cpus(&irqsoff_trace->trace_buffer);
|
||||
|
||||
return start_irqsoff_tracer(irqsoff_trace, set);
|
||||
@@ -303,13 +297,13 @@ static void irqsoff_print_header(struct seq_file *s)
|
||||
/*
|
||||
* Should this new latency be reported/recorded?
|
||||
*/
|
||||
static int report_latency(cycle_t delta)
|
||||
static int report_latency(struct trace_array *tr, cycle_t delta)
|
||||
{
|
||||
if (tracing_thresh) {
|
||||
if (delta < tracing_thresh)
|
||||
return 0;
|
||||
} else {
|
||||
if (delta <= tracing_max_latency)
|
||||
if (delta <= tr->max_latency)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
@@ -333,13 +327,13 @@ check_critical_timing(struct trace_array *tr,
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
if (!report_latency(delta))
|
||||
if (!report_latency(tr, delta))
|
||||
goto out;
|
||||
|
||||
raw_spin_lock_irqsave(&max_trace_lock, flags);
|
||||
|
||||
/* check if we are still the max latency */
|
||||
if (!report_latency(delta))
|
||||
if (!report_latency(tr, delta))
|
||||
goto out_unlock;
|
||||
|
||||
__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
|
||||
@@ -352,7 +346,7 @@ check_critical_timing(struct trace_array *tr,
|
||||
data->critical_end = parent_ip;
|
||||
|
||||
if (likely(!is_tracing_stopped())) {
|
||||
tracing_max_latency = delta;
|
||||
tr->max_latency = delta;
|
||||
update_max_tr_single(tr, current, cpu);
|
||||
}
|
||||
|
||||
@@ -531,7 +525,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
|
||||
}
|
||||
#endif /* CONFIG_PREEMPT_TRACER */
|
||||
|
||||
static int register_irqsoff_function(int graph, int set)
|
||||
static int register_irqsoff_function(struct trace_array *tr, int graph, int set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -543,7 +537,7 @@ static int register_irqsoff_function(int graph, int set)
|
||||
ret = register_ftrace_graph(&irqsoff_graph_return,
|
||||
&irqsoff_graph_entry);
|
||||
else
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
ret = register_ftrace_function(tr->ops);
|
||||
|
||||
if (!ret)
|
||||
function_enabled = true;
|
||||
@@ -551,7 +545,7 @@ static int register_irqsoff_function(int graph, int set)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unregister_irqsoff_function(int graph)
|
||||
static void unregister_irqsoff_function(struct trace_array *tr, int graph)
|
||||
{
|
||||
if (!function_enabled)
|
||||
return;
|
||||
@@ -559,17 +553,17 @@ static void unregister_irqsoff_function(int graph)
|
||||
if (graph)
|
||||
unregister_ftrace_graph();
|
||||
else
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
unregister_ftrace_function(tr->ops);
|
||||
|
||||
function_enabled = false;
|
||||
}
|
||||
|
||||
static void irqsoff_function_set(int set)
|
||||
static void irqsoff_function_set(struct trace_array *tr, int set)
|
||||
{
|
||||
if (set)
|
||||
register_irqsoff_function(is_graph(), 1);
|
||||
register_irqsoff_function(tr, is_graph(), 1);
|
||||
else
|
||||
unregister_irqsoff_function(is_graph());
|
||||
unregister_irqsoff_function(tr, is_graph());
|
||||
}
|
||||
|
||||
static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
|
||||
@@ -577,7 +571,7 @@ static int irqsoff_flag_changed(struct trace_array *tr, u32 mask, int set)
|
||||
struct tracer *tracer = tr->current_trace;
|
||||
|
||||
if (mask & TRACE_ITER_FUNCTION)
|
||||
irqsoff_function_set(set);
|
||||
irqsoff_function_set(tr, set);
|
||||
|
||||
return trace_keep_overwrite(tracer, mask, set);
|
||||
}
|
||||
@@ -586,7 +580,7 @@ static int start_irqsoff_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_irqsoff_function(graph, 0);
|
||||
ret = register_irqsoff_function(tr, graph, 0);
|
||||
|
||||
if (!ret && tracing_is_enabled())
|
||||
tracer_enabled = 1;
|
||||
@@ -600,25 +594,37 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
tracer_enabled = 0;
|
||||
|
||||
unregister_irqsoff_function(graph);
|
||||
unregister_irqsoff_function(tr, graph);
|
||||
}
|
||||
|
||||
static void __irqsoff_tracer_init(struct trace_array *tr)
|
||||
static bool irqsoff_busy;
|
||||
|
||||
static int __irqsoff_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
if (irqsoff_busy)
|
||||
return -EBUSY;
|
||||
|
||||
save_flags = trace_flags;
|
||||
|
||||
/* non overwrite screws up the latency tracers */
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
|
||||
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
irqsoff_trace = tr;
|
||||
/* make sure that the tracer is visible */
|
||||
smp_wmb();
|
||||
tracing_reset_online_cpus(&tr->trace_buffer);
|
||||
|
||||
if (start_irqsoff_tracer(tr, is_graph()))
|
||||
ftrace_init_array_ops(tr, irqsoff_tracer_call);
|
||||
|
||||
/* Only toplevel instance supports graph tracing */
|
||||
if (start_irqsoff_tracer(tr, (tr->flags & TRACE_ARRAY_FL_GLOBAL &&
|
||||
is_graph())))
|
||||
printk(KERN_ERR "failed to start irqsoff tracer\n");
|
||||
|
||||
irqsoff_busy = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void irqsoff_tracer_reset(struct trace_array *tr)
|
||||
@@ -630,6 +636,9 @@ static void irqsoff_tracer_reset(struct trace_array *tr)
|
||||
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
|
||||
ftrace_reset_array_ops(tr);
|
||||
|
||||
irqsoff_busy = false;
|
||||
}
|
||||
|
||||
static void irqsoff_tracer_start(struct trace_array *tr)
|
||||
@@ -647,8 +656,7 @@ static int irqsoff_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
trace_type = TRACER_IRQS_OFF;
|
||||
|
||||
__irqsoff_tracer_init(tr);
|
||||
return 0;
|
||||
return __irqsoff_tracer_init(tr);
|
||||
}
|
||||
static struct tracer irqsoff_tracer __read_mostly =
|
||||
{
|
||||
@@ -668,6 +676,7 @@ static struct tracer irqsoff_tracer __read_mostly =
|
||||
#endif
|
||||
.open = irqsoff_trace_open,
|
||||
.close = irqsoff_trace_close,
|
||||
.allow_instances = true,
|
||||
.use_max_tr = true,
|
||||
};
|
||||
# define register_irqsoff(trace) register_tracer(&trace)
|
||||
@@ -680,8 +689,7 @@ static int preemptoff_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
trace_type = TRACER_PREEMPT_OFF;
|
||||
|
||||
__irqsoff_tracer_init(tr);
|
||||
return 0;
|
||||
return __irqsoff_tracer_init(tr);
|
||||
}
|
||||
|
||||
static struct tracer preemptoff_tracer __read_mostly =
|
||||
@@ -702,6 +710,7 @@ static struct tracer preemptoff_tracer __read_mostly =
|
||||
#endif
|
||||
.open = irqsoff_trace_open,
|
||||
.close = irqsoff_trace_close,
|
||||
.allow_instances = true,
|
||||
.use_max_tr = true,
|
||||
};
|
||||
# define register_preemptoff(trace) register_tracer(&trace)
|
||||
@@ -716,8 +725,7 @@ static int preemptirqsoff_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF;
|
||||
|
||||
__irqsoff_tracer_init(tr);
|
||||
return 0;
|
||||
return __irqsoff_tracer_init(tr);
|
||||
}
|
||||
|
||||
static struct tracer preemptirqsoff_tracer __read_mostly =
|
||||
@@ -738,6 +746,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
|
||||
#endif
|
||||
.open = irqsoff_trace_open,
|
||||
.close = irqsoff_trace_close,
|
||||
.allow_instances = true,
|
||||
.use_max_tr = true,
|
||||
};
|
||||
|
||||
|
@@ -40,27 +40,27 @@ struct trace_kprobe {
|
||||
(sizeof(struct probe_arg) * (n)))
|
||||
|
||||
|
||||
static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk)
|
||||
static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
|
||||
{
|
||||
return tk->rp.handler != NULL;
|
||||
}
|
||||
|
||||
static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk)
|
||||
static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk)
|
||||
{
|
||||
return tk->symbol ? tk->symbol : "unknown";
|
||||
}
|
||||
|
||||
static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
|
||||
static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk)
|
||||
{
|
||||
return tk->rp.kp.offset;
|
||||
}
|
||||
|
||||
static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk)
|
||||
static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
|
||||
{
|
||||
return !!(kprobe_gone(&tk->rp.kp));
|
||||
}
|
||||
|
||||
static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
|
||||
static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
|
||||
struct module *mod)
|
||||
{
|
||||
int len = strlen(mod->name);
|
||||
@@ -68,7 +68,7 @@ static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk,
|
||||
return strncmp(mod->name, name, len) == 0 && name[len] == ':';
|
||||
}
|
||||
|
||||
static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
|
||||
static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk)
|
||||
{
|
||||
return !!strchr(trace_kprobe_symbol(tk), ':');
|
||||
}
|
||||
@@ -132,19 +132,21 @@ struct symbol_cache *alloc_symbol_cache(const char *sym, long offset)
|
||||
* Kprobes-specific fetch functions
|
||||
*/
|
||||
#define DEFINE_FETCH_stack(type) \
|
||||
static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
|
||||
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
|
||||
void *offset, void *dest) \
|
||||
{ \
|
||||
*(type *)dest = (type)regs_get_kernel_stack_nth(regs, \
|
||||
(unsigned int)((unsigned long)offset)); \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type));
|
||||
|
||||
DEFINE_BASIC_FETCH_FUNCS(stack)
|
||||
/* No string on the stack entry */
|
||||
#define fetch_stack_string NULL
|
||||
#define fetch_stack_string_size NULL
|
||||
|
||||
#define DEFINE_FETCH_memory(type) \
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
|
||||
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
|
||||
void *addr, void *dest) \
|
||||
{ \
|
||||
type retval; \
|
||||
@@ -152,14 +154,16 @@ static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
|
||||
*(type *)dest = 0; \
|
||||
else \
|
||||
*(type *)dest = retval; \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type));
|
||||
|
||||
DEFINE_BASIC_FETCH_FUNCS(memory)
|
||||
/*
|
||||
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
|
||||
* length and relative data location.
|
||||
*/
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
{
|
||||
long ret;
|
||||
int maxlen = get_rloc_len(*(u32 *)dest);
|
||||
@@ -193,10 +197,11 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
get_rloc_offs(*(u32 *)dest));
|
||||
}
|
||||
}
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string));
|
||||
|
||||
/* Return the length of string -- including null terminal byte */
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
{
|
||||
mm_segment_t old_fs;
|
||||
int ret, len = 0;
|
||||
@@ -219,17 +224,19 @@ static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
|
||||
else
|
||||
*(u32 *)dest = len;
|
||||
}
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size));
|
||||
|
||||
#define DEFINE_FETCH_symbol(type) \
|
||||
__kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, \
|
||||
void *data, void *dest) \
|
||||
void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\
|
||||
{ \
|
||||
struct symbol_cache *sc = data; \
|
||||
if (sc->addr) \
|
||||
fetch_memory_##type(regs, (void *)sc->addr, dest); \
|
||||
else \
|
||||
*(type *)dest = 0; \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type));
|
||||
|
||||
DEFINE_BASIC_FETCH_FUNCS(symbol)
|
||||
DEFINE_FETCH_symbol(string)
|
||||
DEFINE_FETCH_symbol(string_size)
|
||||
@@ -907,7 +914,7 @@ static const struct file_operations kprobe_profile_ops = {
|
||||
};
|
||||
|
||||
/* Kprobe handler */
|
||||
static __kprobes void
|
||||
static nokprobe_inline void
|
||||
__kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
|
||||
struct ftrace_event_file *ftrace_file)
|
||||
{
|
||||
@@ -943,7 +950,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
|
||||
entry, irq_flags, pc, regs);
|
||||
}
|
||||
|
||||
static __kprobes void
|
||||
static void
|
||||
kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||
{
|
||||
struct event_file_link *link;
|
||||
@@ -951,9 +958,10 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||
list_for_each_entry_rcu(link, &tk->tp.files, list)
|
||||
__kprobe_trace_func(tk, regs, link->file);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_trace_func);
|
||||
|
||||
/* Kretprobe handler */
|
||||
static __kprobes void
|
||||
static nokprobe_inline void
|
||||
__kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
struct pt_regs *regs,
|
||||
struct ftrace_event_file *ftrace_file)
|
||||
@@ -991,7 +999,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
entry, irq_flags, pc, regs);
|
||||
}
|
||||
|
||||
static __kprobes void
|
||||
static void
|
||||
kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
@@ -1000,6 +1008,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
list_for_each_entry_rcu(link, &tk->tp.files, list)
|
||||
__kretprobe_trace_func(tk, ri, regs, link->file);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_trace_func);
|
||||
|
||||
/* Event entry printers */
|
||||
static enum print_line_t
|
||||
@@ -1131,7 +1140,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
||||
/* Kprobe profile handler */
|
||||
static __kprobes void
|
||||
static void
|
||||
kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||
{
|
||||
struct ftrace_event_call *call = &tk->tp.call;
|
||||
@@ -1158,9 +1167,10 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
|
||||
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_perf_func);
|
||||
|
||||
/* Kretprobe profile handler */
|
||||
static __kprobes void
|
||||
static void
|
||||
kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
@@ -1188,6 +1198,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
|
||||
perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL);
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_perf_func);
|
||||
#endif /* CONFIG_PERF_EVENTS */
|
||||
|
||||
/*
|
||||
@@ -1196,9 +1207,8 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
|
||||
* kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe
|
||||
* lockless, but we can't race with this __init function.
|
||||
*/
|
||||
static __kprobes
|
||||
int kprobe_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
static int kprobe_register(struct ftrace_event_call *event,
|
||||
enum trace_reg type, void *data)
|
||||
{
|
||||
struct trace_kprobe *tk = (struct trace_kprobe *)event->data;
|
||||
struct ftrace_event_file *file = data;
|
||||
@@ -1224,8 +1234,7 @@ int kprobe_register(struct ftrace_event_call *event,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __kprobes
|
||||
int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
|
||||
static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
|
||||
|
||||
@@ -1239,9 +1248,10 @@ int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
|
||||
#endif
|
||||
return 0; /* We don't tweek kernel, so just return 0 */
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_dispatcher);
|
||||
|
||||
static __kprobes
|
||||
int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
static int
|
||||
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
{
|
||||
struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp);
|
||||
|
||||
@@ -1255,6 +1265,7 @@ int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
|
||||
#endif
|
||||
return 0; /* We don't tweek kernel, so just return 0 */
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_dispatcher);
|
||||
|
||||
static struct trace_event_functions kretprobe_funcs = {
|
||||
.trace = print_kretprobe_event
|
||||
@@ -1377,6 +1388,9 @@ static __init int kprobe_trace_self_tests_init(void)
|
||||
struct trace_kprobe *tk;
|
||||
struct ftrace_event_file *file;
|
||||
|
||||
if (tracing_is_disabled())
|
||||
return -ENODEV;
|
||||
|
||||
target = kprobe_trace_selftest_target;
|
||||
|
||||
pr_info("Testing kprobe tracing: ");
|
||||
|
@@ -91,7 +91,6 @@ struct tracer nop_trace __read_mostly =
|
||||
.name = "nop",
|
||||
.init = nop_trace_init,
|
||||
.reset = nop_trace_reset,
|
||||
.wait_pipe = poll_wait_pipe,
|
||||
#ifdef CONFIG_FTRACE_SELFTEST
|
||||
.selftest = trace_selftest_startup_nop,
|
||||
#endif
|
||||
|
@@ -125,6 +125,34 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_seq_printf);
|
||||
|
||||
/**
|
||||
* trace_seq_bitmask - put a list of longs as a bitmask print output
|
||||
* @s: trace sequence descriptor
|
||||
* @maskp: points to an array of unsigned longs that represent a bitmask
|
||||
* @nmaskbits: The number of bits that are valid in @maskp
|
||||
*
|
||||
* It returns 0 if the trace oversizes the buffer's free
|
||||
* space, 1 otherwise.
|
||||
*
|
||||
* Writes a ASCII representation of a bitmask string into @s.
|
||||
*/
|
||||
int
|
||||
trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
|
||||
int nmaskbits)
|
||||
{
|
||||
int len = (PAGE_SIZE - 1) - s->len;
|
||||
int ret;
|
||||
|
||||
if (s->full || !len)
|
||||
return 0;
|
||||
|
||||
ret = bitmap_scnprintf(s->buffer, len, maskp, nmaskbits);
|
||||
s->len += ret;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(trace_seq_bitmask);
|
||||
|
||||
/**
|
||||
* trace_seq_vprintf - sequence printing of trace information
|
||||
* @s: trace sequence descriptor
|
||||
@@ -398,6 +426,19 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
|
||||
EXPORT_SYMBOL(ftrace_print_symbols_seq_u64);
|
||||
#endif
|
||||
|
||||
const char *
|
||||
ftrace_print_bitmask_seq(struct trace_seq *p, void *bitmask_ptr,
|
||||
unsigned int bitmask_size)
|
||||
{
|
||||
const char *ret = p->buffer + p->len;
|
||||
|
||||
trace_seq_bitmask(p, bitmask_ptr, bitmask_size * 8);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ftrace_print_bitmask_seq);
|
||||
|
||||
const char *
|
||||
ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
|
||||
{
|
||||
|
@@ -37,13 +37,13 @@ const char *reserved_field_names[] = {
|
||||
|
||||
/* Printing in basic type function template */
|
||||
#define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \
|
||||
__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
|
||||
const char *name, \
|
||||
void *data, void *ent) \
|
||||
int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
|
||||
void *data, void *ent) \
|
||||
{ \
|
||||
return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \
|
||||
} \
|
||||
const char PRINT_TYPE_FMT_NAME(type)[] = fmt;
|
||||
const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \
|
||||
NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type));
|
||||
|
||||
DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x")
|
||||
DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x")
|
||||
@@ -55,9 +55,8 @@ DEFINE_BASIC_PRINT_TYPE_FUNC(s32, "%d")
|
||||
DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld")
|
||||
|
||||
/* Print type function for string type */
|
||||
__kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
|
||||
const char *name,
|
||||
void *data, void *ent)
|
||||
int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name,
|
||||
void *data, void *ent)
|
||||
{
|
||||
int len = *(u32 *)data >> 16;
|
||||
|
||||
@@ -67,6 +66,7 @@ __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s,
|
||||
return trace_seq_printf(s, " %s=\"%s\"", name,
|
||||
(const char *)get_loc_data(data, ent));
|
||||
}
|
||||
NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string));
|
||||
|
||||
const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
|
||||
|
||||
@@ -81,23 +81,24 @@ const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\"";
|
||||
|
||||
/* Data fetch function templates */
|
||||
#define DEFINE_FETCH_reg(type) \
|
||||
__kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \
|
||||
void *offset, void *dest) \
|
||||
void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \
|
||||
{ \
|
||||
*(type *)dest = (type)regs_get_register(regs, \
|
||||
(unsigned int)((unsigned long)offset)); \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type));
|
||||
DEFINE_BASIC_FETCH_FUNCS(reg)
|
||||
/* No string on the register */
|
||||
#define fetch_reg_string NULL
|
||||
#define fetch_reg_string_size NULL
|
||||
|
||||
#define DEFINE_FETCH_retval(type) \
|
||||
__kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
|
||||
void *dummy, void *dest) \
|
||||
void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \
|
||||
void *dummy, void *dest) \
|
||||
{ \
|
||||
*(type *)dest = (type)regs_return_value(regs); \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type));
|
||||
DEFINE_BASIC_FETCH_FUNCS(retval)
|
||||
/* No string on the retval */
|
||||
#define fetch_retval_string NULL
|
||||
@@ -112,8 +113,8 @@ struct deref_fetch_param {
|
||||
};
|
||||
|
||||
#define DEFINE_FETCH_deref(type) \
|
||||
__kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
|
||||
void *data, void *dest) \
|
||||
void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
|
||||
void *data, void *dest) \
|
||||
{ \
|
||||
struct deref_fetch_param *dprm = data; \
|
||||
unsigned long addr; \
|
||||
@@ -123,12 +124,13 @@ __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \
|
||||
dprm->fetch(regs, (void *)addr, dest); \
|
||||
} else \
|
||||
*(type *)dest = 0; \
|
||||
}
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type));
|
||||
DEFINE_BASIC_FETCH_FUNCS(deref)
|
||||
DEFINE_FETCH_deref(string)
|
||||
|
||||
__kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
|
||||
void *data, void *dest)
|
||||
void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
|
||||
void *data, void *dest)
|
||||
{
|
||||
struct deref_fetch_param *dprm = data;
|
||||
unsigned long addr;
|
||||
@@ -140,16 +142,18 @@ __kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs,
|
||||
} else
|
||||
*(string_size *)dest = 0;
|
||||
}
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size));
|
||||
|
||||
static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
|
||||
static void update_deref_fetch_param(struct deref_fetch_param *data)
|
||||
{
|
||||
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
|
||||
update_deref_fetch_param(data->orig.data);
|
||||
else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
|
||||
update_symbol_cache(data->orig.data);
|
||||
}
|
||||
NOKPROBE_SYMBOL(update_deref_fetch_param);
|
||||
|
||||
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
|
||||
static void free_deref_fetch_param(struct deref_fetch_param *data)
|
||||
{
|
||||
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
|
||||
free_deref_fetch_param(data->orig.data);
|
||||
@@ -157,6 +161,7 @@ static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
|
||||
free_symbol_cache(data->orig.data);
|
||||
kfree(data);
|
||||
}
|
||||
NOKPROBE_SYMBOL(free_deref_fetch_param);
|
||||
|
||||
/* Bitfield fetch function */
|
||||
struct bitfield_fetch_param {
|
||||
@@ -166,8 +171,8 @@ struct bitfield_fetch_param {
|
||||
};
|
||||
|
||||
#define DEFINE_FETCH_bitfield(type) \
|
||||
__kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
|
||||
void *data, void *dest) \
|
||||
void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
|
||||
void *data, void *dest) \
|
||||
{ \
|
||||
struct bitfield_fetch_param *bprm = data; \
|
||||
type buf = 0; \
|
||||
@@ -177,13 +182,13 @@ __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \
|
||||
buf >>= bprm->low_shift; \
|
||||
} \
|
||||
*(type *)dest = buf; \
|
||||
}
|
||||
|
||||
} \
|
||||
NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type));
|
||||
DEFINE_BASIC_FETCH_FUNCS(bitfield)
|
||||
#define fetch_bitfield_string NULL
|
||||
#define fetch_bitfield_string_size NULL
|
||||
|
||||
static __kprobes void
|
||||
static void
|
||||
update_bitfield_fetch_param(struct bitfield_fetch_param *data)
|
||||
{
|
||||
/*
|
||||
@@ -196,7 +201,7 @@ update_bitfield_fetch_param(struct bitfield_fetch_param *data)
|
||||
update_symbol_cache(data->orig.data);
|
||||
}
|
||||
|
||||
static __kprobes void
|
||||
static void
|
||||
free_bitfield_fetch_param(struct bitfield_fetch_param *data)
|
||||
{
|
||||
/*
|
||||
@@ -255,17 +260,17 @@ fail:
|
||||
}
|
||||
|
||||
/* Special function : only accept unsigned long */
|
||||
static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs,
|
||||
void *dummy, void *dest)
|
||||
static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest)
|
||||
{
|
||||
*(unsigned long *)dest = kernel_stack_pointer(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(fetch_kernel_stack_address);
|
||||
|
||||
static __kprobes void fetch_user_stack_address(struct pt_regs *regs,
|
||||
void *dummy, void *dest)
|
||||
static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest)
|
||||
{
|
||||
*(unsigned long *)dest = user_stack_pointer(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(fetch_user_stack_address);
|
||||
|
||||
static fetch_func_t get_fetch_size_function(const struct fetch_type *type,
|
||||
fetch_func_t orig_fn,
|
||||
|
@@ -81,13 +81,13 @@
|
||||
*/
|
||||
#define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs))
|
||||
|
||||
static inline void *get_rloc_data(u32 *dl)
|
||||
static nokprobe_inline void *get_rloc_data(u32 *dl)
|
||||
{
|
||||
return (u8 *)dl + get_rloc_offs(*dl);
|
||||
}
|
||||
|
||||
/* For data_loc conversion */
|
||||
static inline void *get_loc_data(u32 *dl, void *ent)
|
||||
static nokprobe_inline void *get_loc_data(u32 *dl, void *ent)
|
||||
{
|
||||
return (u8 *)ent + get_rloc_offs(*dl);
|
||||
}
|
||||
@@ -136,9 +136,8 @@ typedef u32 string_size;
|
||||
|
||||
/* Printing in basic type function template */
|
||||
#define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \
|
||||
__kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \
|
||||
const char *name, \
|
||||
void *data, void *ent); \
|
||||
int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \
|
||||
void *data, void *ent); \
|
||||
extern const char PRINT_TYPE_FMT_NAME(type)[]
|
||||
|
||||
DECLARE_BASIC_PRINT_TYPE_FUNC(u8);
|
||||
@@ -303,7 +302,7 @@ static inline bool trace_probe_is_registered(struct trace_probe *tp)
|
||||
return !!(tp->flags & TP_FLAG_REGISTERED);
|
||||
}
|
||||
|
||||
static inline __kprobes void call_fetch(struct fetch_param *fprm,
|
||||
static nokprobe_inline void call_fetch(struct fetch_param *fprm,
|
||||
struct pt_regs *regs, void *dest)
|
||||
{
|
||||
return fprm->fn(regs, fprm->data, dest);
|
||||
@@ -351,7 +350,7 @@ extern ssize_t traceprobe_probes_write(struct file *file,
|
||||
extern int traceprobe_command(const char *buf, int (*createfn)(int, char**));
|
||||
|
||||
/* Sum up total data length for dynamic arraies (strings) */
|
||||
static inline __kprobes int
|
||||
static nokprobe_inline int
|
||||
__get_data_size(struct trace_probe *tp, struct pt_regs *regs)
|
||||
{
|
||||
int i, ret = 0;
|
||||
@@ -367,7 +366,7 @@ __get_data_size(struct trace_probe *tp, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
/* Store the value of each argument */
|
||||
static inline __kprobes void
|
||||
static nokprobe_inline void
|
||||
store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
|
||||
u8 *data, int maxlen)
|
||||
{
|
||||
|
@@ -130,15 +130,9 @@ wakeup_tracer_call(unsigned long ip, unsigned long parent_ip,
|
||||
atomic_dec(&data->disabled);
|
||||
preempt_enable_notrace();
|
||||
}
|
||||
|
||||
static struct ftrace_ops trace_ops __read_mostly =
|
||||
{
|
||||
.func = wakeup_tracer_call,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
|
||||
static int register_wakeup_function(int graph, int set)
|
||||
static int register_wakeup_function(struct trace_array *tr, int graph, int set)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@@ -150,7 +144,7 @@ static int register_wakeup_function(int graph, int set)
|
||||
ret = register_ftrace_graph(&wakeup_graph_return,
|
||||
&wakeup_graph_entry);
|
||||
else
|
||||
ret = register_ftrace_function(&trace_ops);
|
||||
ret = register_ftrace_function(tr->ops);
|
||||
|
||||
if (!ret)
|
||||
function_enabled = true;
|
||||
@@ -158,7 +152,7 @@ static int register_wakeup_function(int graph, int set)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void unregister_wakeup_function(int graph)
|
||||
static void unregister_wakeup_function(struct trace_array *tr, int graph)
|
||||
{
|
||||
if (!function_enabled)
|
||||
return;
|
||||
@@ -166,17 +160,17 @@ static void unregister_wakeup_function(int graph)
|
||||
if (graph)
|
||||
unregister_ftrace_graph();
|
||||
else
|
||||
unregister_ftrace_function(&trace_ops);
|
||||
unregister_ftrace_function(tr->ops);
|
||||
|
||||
function_enabled = false;
|
||||
}
|
||||
|
||||
static void wakeup_function_set(int set)
|
||||
static void wakeup_function_set(struct trace_array *tr, int set)
|
||||
{
|
||||
if (set)
|
||||
register_wakeup_function(is_graph(), 1);
|
||||
register_wakeup_function(tr, is_graph(), 1);
|
||||
else
|
||||
unregister_wakeup_function(is_graph());
|
||||
unregister_wakeup_function(tr, is_graph());
|
||||
}
|
||||
|
||||
static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
|
||||
@@ -184,16 +178,16 @@ static int wakeup_flag_changed(struct trace_array *tr, u32 mask, int set)
|
||||
struct tracer *tracer = tr->current_trace;
|
||||
|
||||
if (mask & TRACE_ITER_FUNCTION)
|
||||
wakeup_function_set(set);
|
||||
wakeup_function_set(tr, set);
|
||||
|
||||
return trace_keep_overwrite(tracer, mask, set);
|
||||
}
|
||||
|
||||
static int start_func_tracer(int graph)
|
||||
static int start_func_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = register_wakeup_function(graph, 0);
|
||||
ret = register_wakeup_function(tr, graph, 0);
|
||||
|
||||
if (!ret && tracing_is_enabled())
|
||||
tracer_enabled = 1;
|
||||
@@ -203,11 +197,11 @@ static int start_func_tracer(int graph)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void stop_func_tracer(int graph)
|
||||
static void stop_func_tracer(struct trace_array *tr, int graph)
|
||||
{
|
||||
tracer_enabled = 0;
|
||||
|
||||
unregister_wakeup_function(graph);
|
||||
unregister_wakeup_function(tr, graph);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
@@ -221,12 +215,12 @@ wakeup_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
|
||||
if (!(is_graph() ^ set))
|
||||
return 0;
|
||||
|
||||
stop_func_tracer(!set);
|
||||
stop_func_tracer(tr, !set);
|
||||
|
||||
wakeup_reset(wakeup_trace);
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
|
||||
return start_func_tracer(set);
|
||||
return start_func_tracer(tr, set);
|
||||
}
|
||||
|
||||
static int wakeup_graph_entry(struct ftrace_graph_ent *trace)
|
||||
@@ -350,13 +344,13 @@ static void wakeup_print_header(struct seq_file *s)
|
||||
/*
|
||||
* Should this new latency be reported/recorded?
|
||||
*/
|
||||
static int report_latency(cycle_t delta)
|
||||
static int report_latency(struct trace_array *tr, cycle_t delta)
|
||||
{
|
||||
if (tracing_thresh) {
|
||||
if (delta < tracing_thresh)
|
||||
return 0;
|
||||
} else {
|
||||
if (delta <= tracing_max_latency)
|
||||
if (delta <= tr->max_latency)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
@@ -424,11 +418,11 @@ probe_wakeup_sched_switch(void *ignore,
|
||||
T1 = ftrace_now(cpu);
|
||||
delta = T1-T0;
|
||||
|
||||
if (!report_latency(delta))
|
||||
if (!report_latency(wakeup_trace, delta))
|
||||
goto out_unlock;
|
||||
|
||||
if (likely(!is_tracing_stopped())) {
|
||||
tracing_max_latency = delta;
|
||||
wakeup_trace->max_latency = delta;
|
||||
update_max_tr(wakeup_trace, wakeup_task, wakeup_cpu);
|
||||
}
|
||||
|
||||
@@ -587,7 +581,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
if (start_func_tracer(is_graph()))
|
||||
if (start_func_tracer(tr, is_graph()))
|
||||
printk(KERN_ERR "failed to start wakeup tracer\n");
|
||||
|
||||
return;
|
||||
@@ -600,13 +594,15 @@ fail_deprobe:
|
||||
static void stop_wakeup_tracer(struct trace_array *tr)
|
||||
{
|
||||
tracer_enabled = 0;
|
||||
stop_func_tracer(is_graph());
|
||||
stop_func_tracer(tr, is_graph());
|
||||
unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
|
||||
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
|
||||
unregister_trace_sched_wakeup(probe_wakeup, NULL);
|
||||
unregister_trace_sched_migrate_task(probe_wakeup_migrate_task, NULL);
|
||||
}
|
||||
|
||||
static bool wakeup_busy;
|
||||
|
||||
static int __wakeup_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
save_flags = trace_flags;
|
||||
@@ -615,14 +611,20 @@ static int __wakeup_tracer_init(struct trace_array *tr)
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, 1);
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, 1);
|
||||
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
wakeup_trace = tr;
|
||||
ftrace_init_array_ops(tr, wakeup_tracer_call);
|
||||
start_wakeup_tracer(tr);
|
||||
|
||||
wakeup_busy = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int wakeup_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
if (wakeup_busy)
|
||||
return -EBUSY;
|
||||
|
||||
wakeup_dl = 0;
|
||||
wakeup_rt = 0;
|
||||
return __wakeup_tracer_init(tr);
|
||||
@@ -630,6 +632,9 @@ static int wakeup_tracer_init(struct trace_array *tr)
|
||||
|
||||
static int wakeup_rt_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
if (wakeup_busy)
|
||||
return -EBUSY;
|
||||
|
||||
wakeup_dl = 0;
|
||||
wakeup_rt = 1;
|
||||
return __wakeup_tracer_init(tr);
|
||||
@@ -637,6 +642,9 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)
|
||||
|
||||
static int wakeup_dl_tracer_init(struct trace_array *tr)
|
||||
{
|
||||
if (wakeup_busy)
|
||||
return -EBUSY;
|
||||
|
||||
wakeup_dl = 1;
|
||||
wakeup_rt = 0;
|
||||
return __wakeup_tracer_init(tr);
|
||||
@@ -653,6 +661,8 @@ static void wakeup_tracer_reset(struct trace_array *tr)
|
||||
|
||||
set_tracer_flag(tr, TRACE_ITER_LATENCY_FMT, lat_flag);
|
||||
set_tracer_flag(tr, TRACE_ITER_OVERWRITE, overwrite_flag);
|
||||
ftrace_reset_array_ops(tr);
|
||||
wakeup_busy = false;
|
||||
}
|
||||
|
||||
static void wakeup_tracer_start(struct trace_array *tr)
|
||||
@@ -684,6 +694,7 @@ static struct tracer wakeup_tracer __read_mostly =
|
||||
#endif
|
||||
.open = wakeup_trace_open,
|
||||
.close = wakeup_trace_close,
|
||||
.allow_instances = true,
|
||||
.use_max_tr = true,
|
||||
};
|
||||
|
||||
@@ -694,7 +705,6 @@ static struct tracer wakeup_rt_tracer __read_mostly =
|
||||
.reset = wakeup_tracer_reset,
|
||||
.start = wakeup_tracer_start,
|
||||
.stop = wakeup_tracer_stop,
|
||||
.wait_pipe = poll_wait_pipe,
|
||||
.print_max = true,
|
||||
.print_header = wakeup_print_header,
|
||||
.print_line = wakeup_print_line,
|
||||
@@ -706,6 +716,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
|
||||
#endif
|
||||
.open = wakeup_trace_open,
|
||||
.close = wakeup_trace_close,
|
||||
.allow_instances = true,
|
||||
.use_max_tr = true,
|
||||
};
|
||||
|
||||
@@ -716,7 +727,6 @@ static struct tracer wakeup_dl_tracer __read_mostly =
|
||||
.reset = wakeup_tracer_reset,
|
||||
.start = wakeup_tracer_start,
|
||||
.stop = wakeup_tracer_stop,
|
||||
.wait_pipe = poll_wait_pipe,
|
||||
.print_max = true,
|
||||
.print_header = wakeup_print_header,
|
||||
.print_line = wakeup_print_line,
|
||||
|
@@ -65,7 +65,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
|
||||
|
||||
/* Don't allow flipping of max traces now */
|
||||
local_irq_save(flags);
|
||||
arch_spin_lock(&ftrace_max_lock);
|
||||
arch_spin_lock(&buf->tr->max_lock);
|
||||
|
||||
cnt = ring_buffer_entries(buf->buffer);
|
||||
|
||||
@@ -83,7 +83,7 @@ static int trace_test_buffer(struct trace_buffer *buf, unsigned long *count)
|
||||
break;
|
||||
}
|
||||
tracing_on();
|
||||
arch_spin_unlock(&ftrace_max_lock);
|
||||
arch_spin_unlock(&buf->tr->max_lock);
|
||||
local_irq_restore(flags);
|
||||
|
||||
if (count)
|
||||
@@ -161,11 +161,6 @@ static struct ftrace_ops test_probe3 = {
|
||||
.flags = FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
|
||||
static struct ftrace_ops test_global = {
|
||||
.func = trace_selftest_test_global_func,
|
||||
.flags = FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_RECURSION_SAFE,
|
||||
};
|
||||
|
||||
static void print_counts(void)
|
||||
{
|
||||
printk("(%d %d %d %d %d) ",
|
||||
@@ -185,7 +180,7 @@ static void reset_counts(void)
|
||||
trace_selftest_test_dyn_cnt = 0;
|
||||
}
|
||||
|
||||
static int trace_selftest_ops(int cnt)
|
||||
static int trace_selftest_ops(struct trace_array *tr, int cnt)
|
||||
{
|
||||
int save_ftrace_enabled = ftrace_enabled;
|
||||
struct ftrace_ops *dyn_ops;
|
||||
@@ -220,7 +215,11 @@ static int trace_selftest_ops(int cnt)
|
||||
register_ftrace_function(&test_probe1);
|
||||
register_ftrace_function(&test_probe2);
|
||||
register_ftrace_function(&test_probe3);
|
||||
register_ftrace_function(&test_global);
|
||||
/* First time we are running with main function */
|
||||
if (cnt > 1) {
|
||||
ftrace_init_array_ops(tr, trace_selftest_test_global_func);
|
||||
register_ftrace_function(tr->ops);
|
||||
}
|
||||
|
||||
DYN_FTRACE_TEST_NAME();
|
||||
|
||||
@@ -232,8 +231,10 @@ static int trace_selftest_ops(int cnt)
|
||||
goto out;
|
||||
if (trace_selftest_test_probe3_cnt != 1)
|
||||
goto out;
|
||||
if (trace_selftest_test_global_cnt == 0)
|
||||
goto out;
|
||||
if (cnt > 1) {
|
||||
if (trace_selftest_test_global_cnt == 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
DYN_FTRACE_TEST_NAME2();
|
||||
|
||||
@@ -269,8 +270,10 @@ static int trace_selftest_ops(int cnt)
|
||||
goto out_free;
|
||||
if (trace_selftest_test_probe3_cnt != 3)
|
||||
goto out_free;
|
||||
if (trace_selftest_test_global_cnt == 0)
|
||||
goto out;
|
||||
if (cnt > 1) {
|
||||
if (trace_selftest_test_global_cnt == 0)
|
||||
goto out;
|
||||
}
|
||||
if (trace_selftest_test_dyn_cnt == 0)
|
||||
goto out_free;
|
||||
|
||||
@@ -295,7 +298,9 @@ static int trace_selftest_ops(int cnt)
|
||||
unregister_ftrace_function(&test_probe1);
|
||||
unregister_ftrace_function(&test_probe2);
|
||||
unregister_ftrace_function(&test_probe3);
|
||||
unregister_ftrace_function(&test_global);
|
||||
if (cnt > 1)
|
||||
unregister_ftrace_function(tr->ops);
|
||||
ftrace_reset_array_ops(tr);
|
||||
|
||||
/* Make sure everything is off */
|
||||
reset_counts();
|
||||
@@ -315,9 +320,9 @@ static int trace_selftest_ops(int cnt)
|
||||
}
|
||||
|
||||
/* Test dynamic code modification and ftrace filters */
|
||||
int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
struct trace_array *tr,
|
||||
int (*func)(void))
|
||||
static int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
struct trace_array *tr,
|
||||
int (*func)(void))
|
||||
{
|
||||
int save_ftrace_enabled = ftrace_enabled;
|
||||
unsigned long count;
|
||||
@@ -388,7 +393,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
}
|
||||
|
||||
/* Test the ops with global tracing running */
|
||||
ret = trace_selftest_ops(1);
|
||||
ret = trace_selftest_ops(tr, 1);
|
||||
trace->reset(tr);
|
||||
|
||||
out:
|
||||
@@ -399,7 +404,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
|
||||
|
||||
/* Test the ops with global tracing off */
|
||||
if (!ret)
|
||||
ret = trace_selftest_ops(2);
|
||||
ret = trace_selftest_ops(tr, 2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -802,7 +807,7 @@ out:
|
||||
int
|
||||
trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
unsigned long save_max = tracing_max_latency;
|
||||
unsigned long save_max = tr->max_latency;
|
||||
unsigned long count;
|
||||
int ret;
|
||||
|
||||
@@ -814,7 +819,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
}
|
||||
|
||||
/* reset the max latency */
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
/* disable interrupts for a bit */
|
||||
local_irq_disable();
|
||||
udelay(100);
|
||||
@@ -841,7 +846,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
tracing_max_latency = save_max;
|
||||
tr->max_latency = save_max;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -851,7 +856,7 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
int
|
||||
trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
unsigned long save_max = tracing_max_latency;
|
||||
unsigned long save_max = tr->max_latency;
|
||||
unsigned long count;
|
||||
int ret;
|
||||
|
||||
@@ -876,7 +881,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
|
||||
}
|
||||
|
||||
/* reset the max latency */
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
/* disable preemption for a bit */
|
||||
preempt_disable();
|
||||
udelay(100);
|
||||
@@ -903,7 +908,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
|
||||
ret = -1;
|
||||
}
|
||||
|
||||
tracing_max_latency = save_max;
|
||||
tr->max_latency = save_max;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -913,7 +918,7 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr)
|
||||
int
|
||||
trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
unsigned long save_max = tracing_max_latency;
|
||||
unsigned long save_max = tr->max_latency;
|
||||
unsigned long count;
|
||||
int ret;
|
||||
|
||||
@@ -938,7 +943,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
|
||||
}
|
||||
|
||||
/* reset the max latency */
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
|
||||
/* disable preemption and interrupts for a bit */
|
||||
preempt_disable();
|
||||
@@ -973,7 +978,7 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array *
|
||||
}
|
||||
|
||||
/* do the test by disabling interrupts first this time */
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
tracing_start();
|
||||
trace->start(tr);
|
||||
|
||||
@@ -1004,7 +1009,7 @@ out:
|
||||
tracing_start();
|
||||
out_no_start:
|
||||
trace->reset(tr);
|
||||
tracing_max_latency = save_max;
|
||||
tr->max_latency = save_max;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1057,7 +1062,7 @@ static int trace_wakeup_test_thread(void *data)
|
||||
int
|
||||
trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
||||
{
|
||||
unsigned long save_max = tracing_max_latency;
|
||||
unsigned long save_max = tr->max_latency;
|
||||
struct task_struct *p;
|
||||
struct completion is_ready;
|
||||
unsigned long count;
|
||||
@@ -1083,7 +1088,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
||||
}
|
||||
|
||||
/* reset the max latency */
|
||||
tracing_max_latency = 0;
|
||||
tr->max_latency = 0;
|
||||
|
||||
while (p->on_rq) {
|
||||
/*
|
||||
@@ -1113,7 +1118,7 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr)
|
||||
trace->reset(tr);
|
||||
tracing_start();
|
||||
|
||||
tracing_max_latency = save_max;
|
||||
tr->max_latency = save_max;
|
||||
|
||||
/* kill the thread */
|
||||
kthread_stop(p);
|
||||
|
@@ -51,11 +51,33 @@ static DEFINE_MUTEX(stack_sysctl_mutex);
|
||||
int stack_tracer_enabled;
|
||||
static int last_stack_tracer_enabled;
|
||||
|
||||
static inline void print_max_stack(void)
|
||||
{
|
||||
long i;
|
||||
int size;
|
||||
|
||||
pr_emerg(" Depth Size Location (%d entries)\n"
|
||||
" ----- ---- --------\n",
|
||||
max_stack_trace.nr_entries - 1);
|
||||
|
||||
for (i = 0; i < max_stack_trace.nr_entries; i++) {
|
||||
if (stack_dump_trace[i] == ULONG_MAX)
|
||||
break;
|
||||
if (i+1 == max_stack_trace.nr_entries ||
|
||||
stack_dump_trace[i+1] == ULONG_MAX)
|
||||
size = stack_dump_index[i];
|
||||
else
|
||||
size = stack_dump_index[i] - stack_dump_index[i+1];
|
||||
|
||||
pr_emerg("%3ld) %8d %5d %pS\n", i, stack_dump_index[i],
|
||||
size, (void *)stack_dump_trace[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
check_stack(unsigned long ip, unsigned long *stack)
|
||||
{
|
||||
unsigned long this_size, flags;
|
||||
unsigned long *p, *top, *start;
|
||||
unsigned long this_size, flags; unsigned long *p, *top, *start;
|
||||
static int tracer_frame;
|
||||
int frame_size = ACCESS_ONCE(tracer_frame);
|
||||
int i;
|
||||
@@ -85,8 +107,12 @@ check_stack(unsigned long ip, unsigned long *stack)
|
||||
|
||||
max_stack_size = this_size;
|
||||
|
||||
max_stack_trace.nr_entries = 0;
|
||||
max_stack_trace.skip = 3;
|
||||
max_stack_trace.nr_entries = 0;
|
||||
|
||||
if (using_ftrace_ops_list_func())
|
||||
max_stack_trace.skip = 4;
|
||||
else
|
||||
max_stack_trace.skip = 3;
|
||||
|
||||
save_stack_trace(&max_stack_trace);
|
||||
|
||||
@@ -145,8 +171,12 @@ check_stack(unsigned long ip, unsigned long *stack)
|
||||
i++;
|
||||
}
|
||||
|
||||
BUG_ON(current != &init_task &&
|
||||
*(end_of_stack(current)) != STACK_END_MAGIC);
|
||||
if ((current != &init_task &&
|
||||
*(end_of_stack(current)) != STACK_END_MAGIC)) {
|
||||
print_max_stack();
|
||||
BUG();
|
||||
}
|
||||
|
||||
out:
|
||||
arch_spin_unlock(&max_stack_lock);
|
||||
local_irq_restore(flags);
|
||||
|
@@ -108,8 +108,8 @@ static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n)
|
||||
* Uprobes-specific fetch functions
|
||||
*/
|
||||
#define DEFINE_FETCH_stack(type) \
|
||||
static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\
|
||||
void *offset, void *dest) \
|
||||
static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \
|
||||
void *offset, void *dest) \
|
||||
{ \
|
||||
*(type *)dest = (type)get_user_stack_nth(regs, \
|
||||
((unsigned long)offset)); \
|
||||
@@ -120,8 +120,8 @@ DEFINE_BASIC_FETCH_FUNCS(stack)
|
||||
#define fetch_stack_string_size NULL
|
||||
|
||||
#define DEFINE_FETCH_memory(type) \
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\
|
||||
void *addr, void *dest) \
|
||||
static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \
|
||||
void *addr, void *dest) \
|
||||
{ \
|
||||
type retval; \
|
||||
void __user *vaddr = (void __force __user *) addr; \
|
||||
@@ -136,8 +136,8 @@ DEFINE_BASIC_FETCH_FUNCS(memory)
|
||||
* Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
|
||||
* length and relative data location.
|
||||
*/
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
{
|
||||
long ret;
|
||||
u32 rloc = *(u32 *)dest;
|
||||
@@ -158,8 +158,8 @@ static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
|
||||
}
|
||||
}
|
||||
|
||||
static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs,
|
||||
void *addr, void *dest)
|
||||
{
|
||||
int len;
|
||||
void __user *vaddr = (void __force __user *) addr;
|
||||
@@ -184,8 +184,8 @@ static unsigned long translate_user_vaddr(void *file_offset)
|
||||
}
|
||||
|
||||
#define DEFINE_FETCH_file_offset(type) \
|
||||
static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\
|
||||
void *offset, void *dest) \
|
||||
static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \
|
||||
void *offset, void *dest)\
|
||||
{ \
|
||||
void *vaddr = (void *)translate_user_vaddr(offset); \
|
||||
\
|
||||
@@ -893,6 +893,9 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
|
||||
int ret;
|
||||
|
||||
if (file) {
|
||||
if (tu->tp.flags & TP_FLAG_PROFILE)
|
||||
return -EINTR;
|
||||
|
||||
link = kmalloc(sizeof(*link), GFP_KERNEL);
|
||||
if (!link)
|
||||
return -ENOMEM;
|
||||
@@ -901,29 +904,40 @@ probe_event_enable(struct trace_uprobe *tu, struct ftrace_event_file *file,
|
||||
list_add_tail_rcu(&link->list, &tu->tp.files);
|
||||
|
||||
tu->tp.flags |= TP_FLAG_TRACE;
|
||||
} else
|
||||
tu->tp.flags |= TP_FLAG_PROFILE;
|
||||
} else {
|
||||
if (tu->tp.flags & TP_FLAG_TRACE)
|
||||
return -EINTR;
|
||||
|
||||
ret = uprobe_buffer_enable();
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
tu->tp.flags |= TP_FLAG_PROFILE;
|
||||
}
|
||||
|
||||
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
|
||||
|
||||
if (enabled)
|
||||
return 0;
|
||||
|
||||
ret = uprobe_buffer_enable();
|
||||
if (ret)
|
||||
goto err_flags;
|
||||
|
||||
tu->consumer.filter = filter;
|
||||
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
|
||||
if (ret) {
|
||||
if (file) {
|
||||
list_del(&link->list);
|
||||
kfree(link);
|
||||
tu->tp.flags &= ~TP_FLAG_TRACE;
|
||||
} else
|
||||
tu->tp.flags &= ~TP_FLAG_PROFILE;
|
||||
}
|
||||
if (ret)
|
||||
goto err_buffer;
|
||||
|
||||
return 0;
|
||||
|
||||
err_buffer:
|
||||
uprobe_buffer_disable();
|
||||
|
||||
err_flags:
|
||||
if (file) {
|
||||
list_del(&link->list);
|
||||
kfree(link);
|
||||
tu->tp.flags &= ~TP_FLAG_TRACE;
|
||||
} else {
|
||||
tu->tp.flags &= ~TP_FLAG_PROFILE;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1009,9 +1023,32 @@ uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
|
||||
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
|
||||
}
|
||||
|
||||
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
|
||||
{
|
||||
bool done;
|
||||
|
||||
write_lock(&tu->filter.rwlock);
|
||||
if (event->hw.tp_target) {
|
||||
list_del(&event->hw.tp_list);
|
||||
done = tu->filter.nr_systemwide ||
|
||||
(event->hw.tp_target->flags & PF_EXITING) ||
|
||||
uprobe_filter_event(tu, event);
|
||||
} else {
|
||||
tu->filter.nr_systemwide--;
|
||||
done = tu->filter.nr_systemwide;
|
||||
}
|
||||
write_unlock(&tu->filter.rwlock);
|
||||
|
||||
if (!done)
|
||||
return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
|
||||
{
|
||||
bool done;
|
||||
int err;
|
||||
|
||||
write_lock(&tu->filter.rwlock);
|
||||
if (event->hw.tp_target) {
|
||||
@@ -1033,32 +1070,13 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
|
||||
}
|
||||
write_unlock(&tu->filter.rwlock);
|
||||
|
||||
if (!done)
|
||||
uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
|
||||
{
|
||||
bool done;
|
||||
|
||||
write_lock(&tu->filter.rwlock);
|
||||
if (event->hw.tp_target) {
|
||||
list_del(&event->hw.tp_list);
|
||||
done = tu->filter.nr_systemwide ||
|
||||
(event->hw.tp_target->flags & PF_EXITING) ||
|
||||
uprobe_filter_event(tu, event);
|
||||
} else {
|
||||
tu->filter.nr_systemwide--;
|
||||
done = tu->filter.nr_systemwide;
|
||||
err = 0;
|
||||
if (!done) {
|
||||
err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
|
||||
if (err)
|
||||
uprobe_perf_close(tu, event);
|
||||
}
|
||||
write_unlock(&tu->filter.rwlock);
|
||||
|
||||
if (!done)
|
||||
uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
|
||||
|
||||
return 0;
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool uprobe_perf_filter(struct uprobe_consumer *uc,
|
||||
@@ -1197,12 +1215,6 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
|
||||
|
||||
current->utask->vaddr = (unsigned long) &udd;
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if ((tu->tp.flags & TP_FLAG_TRACE) == 0 &&
|
||||
!uprobe_perf_filter(&tu->consumer, 0, current->mm))
|
||||
return UPROBE_HANDLER_REMOVE;
|
||||
#endif
|
||||
|
||||
if (WARN_ON_ONCE(!uprobe_cpu_buffer))
|
||||
return 0;
|
||||
|
||||
|
@@ -492,33 +492,29 @@ static int sys_tracepoint_refcount;
|
||||
|
||||
void syscall_regfunc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *g, *t;
|
||||
struct task_struct *p, *t;
|
||||
|
||||
if (!sys_tracepoint_refcount) {
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
do_each_thread(g, t) {
|
||||
/* Skip kernel threads. */
|
||||
if (t->mm)
|
||||
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
} while_each_thread(g, t);
|
||||
read_unlock_irqrestore(&tasklist_lock, flags);
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(p, t) {
|
||||
set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
sys_tracepoint_refcount++;
|
||||
}
|
||||
|
||||
void syscall_unregfunc(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct task_struct *g, *t;
|
||||
struct task_struct *p, *t;
|
||||
|
||||
sys_tracepoint_refcount--;
|
||||
if (!sys_tracepoint_refcount) {
|
||||
read_lock_irqsave(&tasklist_lock, flags);
|
||||
do_each_thread(g, t) {
|
||||
read_lock(&tasklist_lock);
|
||||
for_each_process_thread(p, t) {
|
||||
clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
|
||||
} while_each_thread(g, t);
|
||||
read_unlock_irqrestore(&tasklist_lock, flags);
|
||||
}
|
||||
read_unlock(&tasklist_lock);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,6 +31,12 @@
|
||||
|
||||
int watchdog_user_enabled = 1;
|
||||
int __read_mostly watchdog_thresh = 10;
|
||||
#ifdef CONFIG_SMP
|
||||
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
|
||||
#else
|
||||
#define sysctl_softlockup_all_cpu_backtrace 0
|
||||
#endif
|
||||
|
||||
static int __read_mostly watchdog_running;
|
||||
static u64 __read_mostly sample_period;
|
||||
|
||||
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
|
||||
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
|
||||
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
|
||||
#endif
|
||||
static unsigned long soft_lockup_nmi_warn;
|
||||
|
||||
/* boot commands */
|
||||
/*
|
||||
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str)
|
||||
}
|
||||
__setup("nosoftlockup", nosoftlockup_setup);
|
||||
/* */
|
||||
#ifdef CONFIG_SMP
|
||||
static int __init softlockup_all_cpu_backtrace_setup(char *str)
|
||||
{
|
||||
sysctl_softlockup_all_cpu_backtrace =
|
||||
!!simple_strtol(str, NULL, 0);
|
||||
return 1;
|
||||
}
|
||||
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
|
||||
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
|
||||
struct pt_regs *regs = get_irq_regs();
|
||||
int duration;
|
||||
int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace;
|
||||
|
||||
/* kick the hardlockup detector */
|
||||
watchdog_interrupt_count();
|
||||
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
if (__this_cpu_read(soft_watchdog_warn) == true)
|
||||
return HRTIMER_RESTART;
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Prevent multiple soft-lockup reports if one cpu is already
|
||||
* engaged in dumping cpu back traces
|
||||
*/
|
||||
if (test_and_set_bit(0, &soft_lockup_nmi_warn)) {
|
||||
/* Someone else will report us. Let's give up */
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
return HRTIMER_RESTART;
|
||||
}
|
||||
}
|
||||
|
||||
printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
|
||||
smp_processor_id(), duration,
|
||||
current->comm, task_pid_nr(current));
|
||||
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
|
||||
else
|
||||
dump_stack();
|
||||
|
||||
if (softlockup_all_cpu_backtrace) {
|
||||
/* Avoid generating two back traces for current
|
||||
* given that one is already made above
|
||||
*/
|
||||
trigger_allbutself_cpu_backtrace();
|
||||
|
||||
clear_bit(0, &soft_lockup_nmi_warn);
|
||||
/* Barrier to sync with other cpus */
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
if (softlockup_panic)
|
||||
panic("softlockup: hung tasks");
|
||||
__this_cpu_write(soft_watchdog_warn, true);
|
||||
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void)
|
||||
int cpu;
|
||||
|
||||
get_online_cpus();
|
||||
preempt_disable();
|
||||
for_each_online_cpu(cpu)
|
||||
update_timers(cpu);
|
||||
preempt_enable();
|
||||
put_online_cpus();
|
||||
}
|
||||
|
||||
|
@@ -65,15 +65,12 @@ enum {
|
||||
* be executing on any CPU. The pool behaves as an unbound one.
|
||||
*
|
||||
* Note that DISASSOCIATED should be flipped only while holding
|
||||
* manager_mutex to avoid changing binding state while
|
||||
* create_worker() is in progress.
|
||||
* attach_mutex to avoid changing binding state while
|
||||
* worker_attach_to_pool() is in progress.
|
||||
*/
|
||||
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
|
||||
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
|
||||
POOL_FREEZING = 1 << 3, /* freeze in progress */
|
||||
|
||||
/* worker flags */
|
||||
WORKER_STARTED = 1 << 0, /* started */
|
||||
WORKER_DIE = 1 << 1, /* die die die */
|
||||
WORKER_IDLE = 1 << 2, /* is idle */
|
||||
WORKER_PREP = 1 << 3, /* preparing to run works */
|
||||
@@ -124,8 +121,7 @@ enum {
|
||||
* cpu or grabbing pool->lock is enough for read access. If
|
||||
* POOL_DISASSOCIATED is set, it's identical to L.
|
||||
*
|
||||
* MG: pool->manager_mutex and pool->lock protected. Writes require both
|
||||
* locks. Reads can happen under either lock.
|
||||
* A: pool->attach_mutex protected.
|
||||
*
|
||||
* PL: wq_pool_mutex protected.
|
||||
*
|
||||
@@ -163,8 +159,11 @@ struct worker_pool {
|
||||
|
||||
/* see manage_workers() for details on the two manager mutexes */
|
||||
struct mutex manager_arb; /* manager arbitration */
|
||||
struct mutex manager_mutex; /* manager exclusion */
|
||||
struct idr worker_idr; /* MG: worker IDs and iteration */
|
||||
struct mutex attach_mutex; /* attach/detach exclusion */
|
||||
struct list_head workers; /* A: attached workers */
|
||||
struct completion *detach_completion; /* all workers detached */
|
||||
|
||||
struct ida worker_ida; /* worker IDs for task name */
|
||||
|
||||
struct workqueue_attrs *attrs; /* I: worker attributes */
|
||||
struct hlist_node hash_node; /* PL: unbound_pool_hash node */
|
||||
@@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
|
||||
lockdep_is_held(&wq->mutex), \
|
||||
"sched RCU or wq->mutex should be held")
|
||||
|
||||
#ifdef CONFIG_LOCKDEP
|
||||
#define assert_manager_or_pool_lock(pool) \
|
||||
WARN_ONCE(debug_locks && \
|
||||
!lockdep_is_held(&(pool)->manager_mutex) && \
|
||||
!lockdep_is_held(&(pool)->lock), \
|
||||
"pool->manager_mutex or ->lock should be held")
|
||||
#else
|
||||
#define assert_manager_or_pool_lock(pool) do { } while (0)
|
||||
#endif
|
||||
|
||||
#define for_each_cpu_worker_pool(pool, cpu) \
|
||||
for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
|
||||
(pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
|
||||
@@ -375,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
|
||||
/**
|
||||
* for_each_pool_worker - iterate through all workers of a worker_pool
|
||||
* @worker: iteration cursor
|
||||
* @wi: integer used for iteration
|
||||
* @pool: worker_pool to iterate workers of
|
||||
*
|
||||
* This must be called with either @pool->manager_mutex or ->lock held.
|
||||
* This must be called with @pool->attach_mutex.
|
||||
*
|
||||
* The if/else clause exists only for the lockdep assertion and can be
|
||||
* ignored.
|
||||
*/
|
||||
#define for_each_pool_worker(worker, wi, pool) \
|
||||
idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \
|
||||
if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
|
||||
#define for_each_pool_worker(worker, pool) \
|
||||
list_for_each_entry((worker), &(pool)->workers, node) \
|
||||
if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
|
||||
else
|
||||
|
||||
/**
|
||||
@@ -763,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
|
||||
return need_more_worker(pool) && !may_start_working(pool);
|
||||
}
|
||||
|
||||
/* Do I need to be the manager? */
|
||||
static bool need_to_manage_workers(struct worker_pool *pool)
|
||||
{
|
||||
return need_to_create_worker(pool) ||
|
||||
(pool->flags & POOL_MANAGE_WORKERS);
|
||||
}
|
||||
|
||||
/* Do we have too many workers and should some go away? */
|
||||
static bool too_many_workers(struct worker_pool *pool)
|
||||
{
|
||||
@@ -791,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool)
|
||||
* Wake up functions.
|
||||
*/
|
||||
|
||||
/* Return the first worker. Safe with preemption disabled */
|
||||
static struct worker *first_worker(struct worker_pool *pool)
|
||||
/* Return the first idle worker. Safe with preemption disabled */
|
||||
static struct worker *first_idle_worker(struct worker_pool *pool)
|
||||
{
|
||||
if (unlikely(list_empty(&pool->idle_list)))
|
||||
return NULL;
|
||||
@@ -811,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool)
|
||||
*/
|
||||
static void wake_up_worker(struct worker_pool *pool)
|
||||
{
|
||||
struct worker *worker = first_worker(pool);
|
||||
struct worker *worker = first_idle_worker(pool);
|
||||
|
||||
if (likely(worker))
|
||||
wake_up_process(worker->task);
|
||||
@@ -885,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
|
||||
*/
|
||||
if (atomic_dec_and_test(&pool->nr_running) &&
|
||||
!list_empty(&pool->worklist))
|
||||
to_wakeup = first_worker(pool);
|
||||
to_wakeup = first_idle_worker(pool);
|
||||
return to_wakeup ? to_wakeup->task : NULL;
|
||||
}
|
||||
|
||||
@@ -1621,70 +1602,6 @@ static void worker_leave_idle(struct worker *worker)
|
||||
list_del_init(&worker->entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
|
||||
* @pool: target worker_pool
|
||||
*
|
||||
* Bind %current to the cpu of @pool if it is associated and lock @pool.
|
||||
*
|
||||
* Works which are scheduled while the cpu is online must at least be
|
||||
* scheduled to a worker which is bound to the cpu so that if they are
|
||||
* flushed from cpu callbacks while cpu is going down, they are
|
||||
* guaranteed to execute on the cpu.
|
||||
*
|
||||
* This function is to be used by unbound workers and rescuers to bind
|
||||
* themselves to the target cpu and may race with cpu going down or
|
||||
* coming online. kthread_bind() can't be used because it may put the
|
||||
* worker to already dead cpu and set_cpus_allowed_ptr() can't be used
|
||||
* verbatim as it's best effort and blocking and pool may be
|
||||
* [dis]associated in the meantime.
|
||||
*
|
||||
* This function tries set_cpus_allowed() and locks pool and verifies the
|
||||
* binding against %POOL_DISASSOCIATED which is set during
|
||||
* %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
|
||||
* enters idle state or fetches works without dropping lock, it can
|
||||
* guarantee the scheduling requirement described in the first paragraph.
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep. Called without any lock but returns with pool->lock
|
||||
* held.
|
||||
*
|
||||
* Return:
|
||||
* %true if the associated pool is online (@worker is successfully
|
||||
* bound), %false if offline.
|
||||
*/
|
||||
static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
|
||||
__acquires(&pool->lock)
|
||||
{
|
||||
while (true) {
|
||||
/*
|
||||
* The following call may fail, succeed or succeed
|
||||
* without actually migrating the task to the cpu if
|
||||
* it races with cpu hotunplug operation. Verify
|
||||
* against POOL_DISASSOCIATED.
|
||||
*/
|
||||
if (!(pool->flags & POOL_DISASSOCIATED))
|
||||
set_cpus_allowed_ptr(current, pool->attrs->cpumask);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
if (pool->flags & POOL_DISASSOCIATED)
|
||||
return false;
|
||||
if (task_cpu(current) == pool->cpu &&
|
||||
cpumask_equal(¤t->cpus_allowed, pool->attrs->cpumask))
|
||||
return true;
|
||||
spin_unlock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* We've raced with CPU hot[un]plug. Give it a breather
|
||||
* and retry migration. cond_resched() is required here;
|
||||
* otherwise, we might deadlock against cpu_stop trying to
|
||||
* bring down the CPU on non-preemptive kernel.
|
||||
*/
|
||||
cpu_relax();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
static struct worker *alloc_worker(void)
|
||||
{
|
||||
struct worker *worker;
|
||||
@@ -1693,19 +1610,76 @@ static struct worker *alloc_worker(void)
|
||||
if (worker) {
|
||||
INIT_LIST_HEAD(&worker->entry);
|
||||
INIT_LIST_HEAD(&worker->scheduled);
|
||||
INIT_LIST_HEAD(&worker->node);
|
||||
/* on creation a worker is in !idle && prep state */
|
||||
worker->flags = WORKER_PREP;
|
||||
}
|
||||
return worker;
|
||||
}
|
||||
|
||||
/**
|
||||
* worker_attach_to_pool() - attach a worker to a pool
|
||||
* @worker: worker to be attached
|
||||
* @pool: the target pool
|
||||
*
|
||||
* Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
|
||||
* cpu-binding of @worker are kept coordinated with the pool across
|
||||
* cpu-[un]hotplugs.
|
||||
*/
|
||||
static void worker_attach_to_pool(struct worker *worker,
|
||||
struct worker_pool *pool)
|
||||
{
|
||||
mutex_lock(&pool->attach_mutex);
|
||||
|
||||
/*
|
||||
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
|
||||
* online CPUs. It'll be re-applied when any of the CPUs come up.
|
||||
*/
|
||||
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
|
||||
|
||||
/*
|
||||
* The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
|
||||
* stable across this function. See the comments above the
|
||||
* flag definition for details.
|
||||
*/
|
||||
if (pool->flags & POOL_DISASSOCIATED)
|
||||
worker->flags |= WORKER_UNBOUND;
|
||||
|
||||
list_add_tail(&worker->node, &pool->workers);
|
||||
|
||||
mutex_unlock(&pool->attach_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* worker_detach_from_pool() - detach a worker from its pool
|
||||
* @worker: worker which is attached to its pool
|
||||
* @pool: the pool @worker is attached to
|
||||
*
|
||||
* Undo the attaching which had been done in worker_attach_to_pool(). The
|
||||
* caller worker shouldn't access to the pool after detached except it has
|
||||
* other reference to the pool.
|
||||
*/
|
||||
static void worker_detach_from_pool(struct worker *worker,
|
||||
struct worker_pool *pool)
|
||||
{
|
||||
struct completion *detach_completion = NULL;
|
||||
|
||||
mutex_lock(&pool->attach_mutex);
|
||||
list_del(&worker->node);
|
||||
if (list_empty(&pool->workers))
|
||||
detach_completion = pool->detach_completion;
|
||||
mutex_unlock(&pool->attach_mutex);
|
||||
|
||||
if (detach_completion)
|
||||
complete(detach_completion);
|
||||
}
|
||||
|
||||
/**
|
||||
* create_worker - create a new workqueue worker
|
||||
* @pool: pool the new worker will belong to
|
||||
*
|
||||
* Create a new worker which is bound to @pool. The returned worker
|
||||
* can be started by calling start_worker() or destroyed using
|
||||
* destroy_worker().
|
||||
* Create a new worker which is attached to @pool. The new worker must be
|
||||
* started by start_worker().
|
||||
*
|
||||
* CONTEXT:
|
||||
* Might sleep. Does GFP_KERNEL allocations.
|
||||
@@ -1719,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool)
|
||||
int id = -1;
|
||||
char id_buf[16];
|
||||
|
||||
lockdep_assert_held(&pool->manager_mutex);
|
||||
|
||||
/*
|
||||
* ID is needed to determine kthread name. Allocate ID first
|
||||
* without installing the pointer.
|
||||
*/
|
||||
idr_preload(GFP_KERNEL);
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
idr_preload_end();
|
||||
/* ID is needed to determine kthread name */
|
||||
id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
|
||||
if (id < 0)
|
||||
goto fail;
|
||||
|
||||
@@ -1758,33 +1721,14 @@ static struct worker *create_worker(struct worker_pool *pool)
|
||||
/* prevent userland from meddling with cpumask of workqueue workers */
|
||||
worker->task->flags |= PF_NO_SETAFFINITY;
|
||||
|
||||
/*
|
||||
* set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
|
||||
* online CPUs. It'll be re-applied when any of the CPUs come up.
|
||||
*/
|
||||
set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
|
||||
|
||||
/*
|
||||
* The caller is responsible for ensuring %POOL_DISASSOCIATED
|
||||
* remains stable across this function. See the comments above the
|
||||
* flag definition for details.
|
||||
*/
|
||||
if (pool->flags & POOL_DISASSOCIATED)
|
||||
worker->flags |= WORKER_UNBOUND;
|
||||
|
||||
/* successful, commit the pointer to idr */
|
||||
spin_lock_irq(&pool->lock);
|
||||
idr_replace(&pool->worker_idr, worker, worker->id);
|
||||
spin_unlock_irq(&pool->lock);
|
||||
/* successful, attach the worker to the pool */
|
||||
worker_attach_to_pool(worker, pool);
|
||||
|
||||
return worker;
|
||||
|
||||
fail:
|
||||
if (id >= 0) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
idr_remove(&pool->worker_idr, id);
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
if (id >= 0)
|
||||
ida_simple_remove(&pool->worker_ida, id);
|
||||
kfree(worker);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1800,7 +1744,6 @@ fail:
|
||||
*/
|
||||
static void start_worker(struct worker *worker)
|
||||
{
|
||||
worker->flags |= WORKER_STARTED;
|
||||
worker->pool->nr_workers++;
|
||||
worker_enter_idle(worker);
|
||||
wake_up_process(worker->task);
|
||||
@@ -1818,8 +1761,6 @@ static int create_and_start_worker(struct worker_pool *pool)
|
||||
{
|
||||
struct worker *worker;
|
||||
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
|
||||
worker = create_worker(pool);
|
||||
if (worker) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
@@ -1827,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool)
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
|
||||
return worker ? 0 : -ENOMEM;
|
||||
}
|
||||
|
||||
@@ -1836,46 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool)
|
||||
* destroy_worker - destroy a workqueue worker
|
||||
* @worker: worker to be destroyed
|
||||
*
|
||||
* Destroy @worker and adjust @pool stats accordingly.
|
||||
* Destroy @worker and adjust @pool stats accordingly. The worker should
|
||||
* be idle.
|
||||
*
|
||||
* CONTEXT:
|
||||
* spin_lock_irq(pool->lock) which is released and regrabbed.
|
||||
* spin_lock_irq(pool->lock).
|
||||
*/
|
||||
static void destroy_worker(struct worker *worker)
|
||||
{
|
||||
struct worker_pool *pool = worker->pool;
|
||||
|
||||
lockdep_assert_held(&pool->manager_mutex);
|
||||
lockdep_assert_held(&pool->lock);
|
||||
|
||||
/* sanity check frenzy */
|
||||
if (WARN_ON(worker->current_work) ||
|
||||
WARN_ON(!list_empty(&worker->scheduled)))
|
||||
WARN_ON(!list_empty(&worker->scheduled)) ||
|
||||
WARN_ON(!(worker->flags & WORKER_IDLE)))
|
||||
return;
|
||||
|
||||
if (worker->flags & WORKER_STARTED)
|
||||
pool->nr_workers--;
|
||||
if (worker->flags & WORKER_IDLE)
|
||||
pool->nr_idle--;
|
||||
|
||||
/*
|
||||
* Once WORKER_DIE is set, the kworker may destroy itself at any
|
||||
* point. Pin to ensure the task stays until we're done with it.
|
||||
*/
|
||||
get_task_struct(worker->task);
|
||||
pool->nr_workers--;
|
||||
pool->nr_idle--;
|
||||
|
||||
list_del_init(&worker->entry);
|
||||
worker->flags |= WORKER_DIE;
|
||||
|
||||
idr_remove(&pool->worker_idr, worker->id);
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
|
||||
kthread_stop(worker->task);
|
||||
put_task_struct(worker->task);
|
||||
kfree(worker);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
wake_up_process(worker->task);
|
||||
}
|
||||
|
||||
static void idle_worker_timeout(unsigned long __pool)
|
||||
@@ -1884,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool)
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
if (too_many_workers(pool)) {
|
||||
while (too_many_workers(pool)) {
|
||||
struct worker *worker;
|
||||
unsigned long expires;
|
||||
|
||||
@@ -1892,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool)
|
||||
worker = list_entry(pool->idle_list.prev, struct worker, entry);
|
||||
expires = worker->last_active + IDLE_WORKER_TIMEOUT;
|
||||
|
||||
if (time_before(jiffies, expires))
|
||||
if (time_before(jiffies, expires)) {
|
||||
mod_timer(&pool->idle_timer, expires);
|
||||
else {
|
||||
/* it's been idle for too long, wake up manager */
|
||||
pool->flags |= POOL_MANAGE_WORKERS;
|
||||
wake_up_worker(pool);
|
||||
break;
|
||||
}
|
||||
|
||||
destroy_worker(worker);
|
||||
}
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
@@ -2016,44 +1938,6 @@ restart:
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* maybe_destroy_worker - destroy workers which have been idle for a while
|
||||
* @pool: pool to destroy workers for
|
||||
*
|
||||
* Destroy @pool workers which have been idle for longer than
|
||||
* IDLE_WORKER_TIMEOUT.
|
||||
*
|
||||
* LOCKING:
|
||||
* spin_lock_irq(pool->lock) which may be released and regrabbed
|
||||
* multiple times. Called only from manager.
|
||||
*
|
||||
* Return:
|
||||
* %false if no action was taken and pool->lock stayed locked, %true
|
||||
* otherwise.
|
||||
*/
|
||||
static bool maybe_destroy_workers(struct worker_pool *pool)
|
||||
{
|
||||
bool ret = false;
|
||||
|
||||
while (too_many_workers(pool)) {
|
||||
struct worker *worker;
|
||||
unsigned long expires;
|
||||
|
||||
worker = list_entry(pool->idle_list.prev, struct worker, entry);
|
||||
expires = worker->last_active + IDLE_WORKER_TIMEOUT;
|
||||
|
||||
if (time_before(jiffies, expires)) {
|
||||
mod_timer(&pool->idle_timer, expires);
|
||||
break;
|
||||
}
|
||||
|
||||
destroy_worker(worker);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* manage_workers - manage worker pool
|
||||
* @worker: self
|
||||
@@ -2083,8 +1967,6 @@ static bool manage_workers(struct worker *worker)
|
||||
bool ret = false;
|
||||
|
||||
/*
|
||||
* Managership is governed by two mutexes - manager_arb and
|
||||
* manager_mutex. manager_arb handles arbitration of manager role.
|
||||
* Anyone who successfully grabs manager_arb wins the arbitration
|
||||
* and becomes the manager. mutex_trylock() on pool->manager_arb
|
||||
* failure while holding pool->lock reliably indicates that someone
|
||||
@@ -2093,40 +1975,12 @@ static bool manage_workers(struct worker *worker)
|
||||
* grabbing manager_arb is responsible for actually performing
|
||||
* manager duties. If manager_arb is grabbed and released without
|
||||
* actual management, the pool may stall indefinitely.
|
||||
*
|
||||
* manager_mutex is used for exclusion of actual management
|
||||
* operations. The holder of manager_mutex can be sure that none
|
||||
* of management operations, including creation and destruction of
|
||||
* workers, won't take place until the mutex is released. Because
|
||||
* manager_mutex doesn't interfere with manager role arbitration,
|
||||
* it is guaranteed that the pool's management, while may be
|
||||
* delayed, won't be disturbed by someone else grabbing
|
||||
* manager_mutex.
|
||||
*/
|
||||
if (!mutex_trylock(&pool->manager_arb))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* With manager arbitration won, manager_mutex would be free in
|
||||
* most cases. trylock first without dropping @pool->lock.
|
||||
*/
|
||||
if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
|
||||
spin_unlock_irq(&pool->lock);
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
spin_lock_irq(&pool->lock);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
pool->flags &= ~POOL_MANAGE_WORKERS;
|
||||
|
||||
/*
|
||||
* Destroy and then create so that may_start_working() is true
|
||||
* on return.
|
||||
*/
|
||||
ret |= maybe_destroy_workers(pool);
|
||||
ret |= maybe_create_worker(pool);
|
||||
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
mutex_unlock(&pool->manager_arb);
|
||||
return ret;
|
||||
}
|
||||
@@ -2314,6 +2168,11 @@ woke_up:
|
||||
spin_unlock_irq(&pool->lock);
|
||||
WARN_ON_ONCE(!list_empty(&worker->entry));
|
||||
worker->task->flags &= ~PF_WQ_WORKER;
|
||||
|
||||
set_task_comm(worker->task, "kworker/dying");
|
||||
ida_simple_remove(&pool->worker_ida, worker->id);
|
||||
worker_detach_from_pool(worker, pool);
|
||||
kfree(worker);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2361,9 +2220,6 @@ recheck:
|
||||
|
||||
worker_set_flags(worker, WORKER_PREP, false);
|
||||
sleep:
|
||||
if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
|
||||
goto recheck;
|
||||
|
||||
/*
|
||||
* pool->lock is held and there's no work to process and no need to
|
||||
* manage, sleep. Workers are woken up only while holding
|
||||
@@ -2440,8 +2296,9 @@ repeat:
|
||||
|
||||
spin_unlock_irq(&wq_mayday_lock);
|
||||
|
||||
/* migrate to the target cpu if possible */
|
||||
worker_maybe_bind_and_lock(pool);
|
||||
worker_attach_to_pool(rescuer, pool);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
rescuer->pool = pool;
|
||||
|
||||
/*
|
||||
@@ -2454,6 +2311,11 @@ repeat:
|
||||
move_linked_works(work, scheduled, &n);
|
||||
|
||||
process_scheduled_works(rescuer);
|
||||
spin_unlock_irq(&pool->lock);
|
||||
|
||||
worker_detach_from_pool(rescuer, pool);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* Put the reference grabbed by send_mayday(). @pool won't
|
||||
@@ -3422,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
|
||||
}
|
||||
}
|
||||
|
||||
dev_set_uevent_suppress(&wq_dev->dev, false);
|
||||
kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
|
||||
return 0;
|
||||
}
|
||||
@@ -3550,9 +3413,10 @@ static int init_worker_pool(struct worker_pool *pool)
|
||||
(unsigned long)pool);
|
||||
|
||||
mutex_init(&pool->manager_arb);
|
||||
mutex_init(&pool->manager_mutex);
|
||||
idr_init(&pool->worker_idr);
|
||||
mutex_init(&pool->attach_mutex);
|
||||
INIT_LIST_HEAD(&pool->workers);
|
||||
|
||||
ida_init(&pool->worker_ida);
|
||||
INIT_HLIST_NODE(&pool->hash_node);
|
||||
pool->refcnt = 1;
|
||||
|
||||
@@ -3567,7 +3431,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
|
||||
{
|
||||
struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
|
||||
|
||||
idr_destroy(&pool->worker_idr);
|
||||
ida_destroy(&pool->worker_ida);
|
||||
free_workqueue_attrs(pool->attrs);
|
||||
kfree(pool);
|
||||
}
|
||||
@@ -3585,6 +3449,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
|
||||
*/
|
||||
static void put_unbound_pool(struct worker_pool *pool)
|
||||
{
|
||||
DECLARE_COMPLETION_ONSTACK(detach_completion);
|
||||
struct worker *worker;
|
||||
|
||||
lockdep_assert_held(&wq_pool_mutex);
|
||||
@@ -3605,18 +3470,24 @@ static void put_unbound_pool(struct worker_pool *pool)
|
||||
/*
|
||||
* Become the manager and destroy all workers. Grabbing
|
||||
* manager_arb prevents @pool's workers from blocking on
|
||||
* manager_mutex.
|
||||
* attach_mutex.
|
||||
*/
|
||||
mutex_lock(&pool->manager_arb);
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
while ((worker = first_worker(pool)))
|
||||
spin_lock_irq(&pool->lock);
|
||||
while ((worker = first_idle_worker(pool)))
|
||||
destroy_worker(worker);
|
||||
WARN_ON(pool->nr_workers || pool->nr_idle);
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
|
||||
mutex_lock(&pool->attach_mutex);
|
||||
if (!list_empty(&pool->workers))
|
||||
pool->detach_completion = &detach_completion;
|
||||
mutex_unlock(&pool->attach_mutex);
|
||||
|
||||
if (pool->detach_completion)
|
||||
wait_for_completion(pool->detach_completion);
|
||||
|
||||
mutex_unlock(&pool->manager_arb);
|
||||
|
||||
/* shut down the timers */
|
||||
@@ -3662,9 +3533,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
|
||||
if (!pool || init_worker_pool(pool) < 0)
|
||||
goto fail;
|
||||
|
||||
if (workqueue_freezing)
|
||||
pool->flags |= POOL_FREEZING;
|
||||
|
||||
lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
|
||||
copy_workqueue_attrs(pool->attrs, attrs);
|
||||
|
||||
@@ -3771,7 +3639,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
|
||||
|
||||
spin_lock_irq(&pwq->pool->lock);
|
||||
|
||||
if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
|
||||
/*
|
||||
* During [un]freezing, the caller is responsible for ensuring that
|
||||
* this function is called at least once after @workqueue_freezing
|
||||
* is updated and visible.
|
||||
*/
|
||||
if (!freezable || !workqueue_freezing) {
|
||||
pwq->max_active = wq->saved_max_active;
|
||||
|
||||
while (!list_empty(&pwq->delayed_works) &&
|
||||
@@ -4103,17 +3976,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
|
||||
* Let's determine what needs to be done. If the target cpumask is
|
||||
* different from wq's, we need to compare it to @pwq's and create
|
||||
* a new one if they don't match. If the target cpumask equals
|
||||
* wq's, the default pwq should be used. If @pwq is already the
|
||||
* default one, nothing to do; otherwise, install the default one.
|
||||
* wq's, the default pwq should be used.
|
||||
*/
|
||||
if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
|
||||
if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
|
||||
goto out_unlock;
|
||||
} else {
|
||||
if (pwq == wq->dfl_pwq)
|
||||
goto out_unlock;
|
||||
else
|
||||
goto use_dfl_pwq;
|
||||
goto use_dfl_pwq;
|
||||
}
|
||||
|
||||
mutex_unlock(&wq->mutex);
|
||||
@@ -4121,8 +3990,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
|
||||
/* create a new pwq */
|
||||
pwq = alloc_unbound_pwq(wq, target_attrs);
|
||||
if (!pwq) {
|
||||
pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
|
||||
wq->name);
|
||||
pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
|
||||
wq->name);
|
||||
mutex_lock(&wq->mutex);
|
||||
goto use_dfl_pwq;
|
||||
}
|
||||
@@ -4599,28 +4468,27 @@ static void wq_unbind_fn(struct work_struct *work)
|
||||
int cpu = smp_processor_id();
|
||||
struct worker_pool *pool;
|
||||
struct worker *worker;
|
||||
int wi;
|
||||
|
||||
for_each_cpu_worker_pool(pool, cpu) {
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
mutex_lock(&pool->attach_mutex);
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
/*
|
||||
* We've blocked all manager operations. Make all workers
|
||||
* We've blocked all attach/detach operations. Make all workers
|
||||
* unbound and set DISASSOCIATED. Before this, all workers
|
||||
* except for the ones which are still executing works from
|
||||
* before the last CPU down must be on the cpu. After
|
||||
* this, they may become diasporas.
|
||||
*/
|
||||
for_each_pool_worker(worker, wi, pool)
|
||||
for_each_pool_worker(worker, pool)
|
||||
worker->flags |= WORKER_UNBOUND;
|
||||
|
||||
pool->flags |= POOL_DISASSOCIATED;
|
||||
|
||||
spin_unlock_irq(&pool->lock);
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
mutex_unlock(&pool->attach_mutex);
|
||||
|
||||
/*
|
||||
* Call schedule() so that we cross rq->lock and thus can
|
||||
@@ -4660,9 +4528,8 @@ static void wq_unbind_fn(struct work_struct *work)
|
||||
static void rebind_workers(struct worker_pool *pool)
|
||||
{
|
||||
struct worker *worker;
|
||||
int wi;
|
||||
|
||||
lockdep_assert_held(&pool->manager_mutex);
|
||||
lockdep_assert_held(&pool->attach_mutex);
|
||||
|
||||
/*
|
||||
* Restore CPU affinity of all workers. As all idle workers should
|
||||
@@ -4671,13 +4538,13 @@ static void rebind_workers(struct worker_pool *pool)
|
||||
* of all workers first and then clear UNBOUND. As we're called
|
||||
* from CPU_ONLINE, the following shouldn't fail.
|
||||
*/
|
||||
for_each_pool_worker(worker, wi, pool)
|
||||
for_each_pool_worker(worker, pool)
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
|
||||
pool->attrs->cpumask) < 0);
|
||||
|
||||
spin_lock_irq(&pool->lock);
|
||||
|
||||
for_each_pool_worker(worker, wi, pool) {
|
||||
for_each_pool_worker(worker, pool) {
|
||||
unsigned int worker_flags = worker->flags;
|
||||
|
||||
/*
|
||||
@@ -4729,9 +4596,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
|
||||
{
|
||||
static cpumask_t cpumask;
|
||||
struct worker *worker;
|
||||
int wi;
|
||||
|
||||
lockdep_assert_held(&pool->manager_mutex);
|
||||
lockdep_assert_held(&pool->attach_mutex);
|
||||
|
||||
/* is @cpu allowed for @pool? */
|
||||
if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
|
||||
@@ -4743,7 +4609,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
|
||||
return;
|
||||
|
||||
/* as we're called from CPU_ONLINE, the following shouldn't fail */
|
||||
for_each_pool_worker(worker, wi, pool)
|
||||
for_each_pool_worker(worker, pool)
|
||||
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
|
||||
pool->attrs->cpumask) < 0);
|
||||
}
|
||||
@@ -4776,7 +4642,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
|
||||
mutex_lock(&wq_pool_mutex);
|
||||
|
||||
for_each_pool(pool, pi) {
|
||||
mutex_lock(&pool->manager_mutex);
|
||||
mutex_lock(&pool->attach_mutex);
|
||||
|
||||
if (pool->cpu == cpu) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
@@ -4788,7 +4654,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
|
||||
restore_unbound_workers_cpumask(pool, cpu);
|
||||
}
|
||||
|
||||
mutex_unlock(&pool->manager_mutex);
|
||||
mutex_unlock(&pool->attach_mutex);
|
||||
}
|
||||
|
||||
/* update NUMA affinity of unbound workqueues */
|
||||
@@ -4887,24 +4753,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
|
||||
*/
|
||||
void freeze_workqueues_begin(void)
|
||||
{
|
||||
struct worker_pool *pool;
|
||||
struct workqueue_struct *wq;
|
||||
struct pool_workqueue *pwq;
|
||||
int pi;
|
||||
|
||||
mutex_lock(&wq_pool_mutex);
|
||||
|
||||
WARN_ON_ONCE(workqueue_freezing);
|
||||
workqueue_freezing = true;
|
||||
|
||||
/* set FREEZING */
|
||||
for_each_pool(pool, pi) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
WARN_ON_ONCE(pool->flags & POOL_FREEZING);
|
||||
pool->flags |= POOL_FREEZING;
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
mutex_lock(&wq->mutex);
|
||||
for_each_pwq(pwq, wq)
|
||||
@@ -4974,21 +4830,13 @@ void thaw_workqueues(void)
|
||||
{
|
||||
struct workqueue_struct *wq;
|
||||
struct pool_workqueue *pwq;
|
||||
struct worker_pool *pool;
|
||||
int pi;
|
||||
|
||||
mutex_lock(&wq_pool_mutex);
|
||||
|
||||
if (!workqueue_freezing)
|
||||
goto out_unlock;
|
||||
|
||||
/* clear FREEZING */
|
||||
for_each_pool(pool, pi) {
|
||||
spin_lock_irq(&pool->lock);
|
||||
WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
|
||||
pool->flags &= ~POOL_FREEZING;
|
||||
spin_unlock_irq(&pool->lock);
|
||||
}
|
||||
workqueue_freezing = false;
|
||||
|
||||
/* restore max_active and repopulate worklist */
|
||||
list_for_each_entry(wq, &workqueues, list) {
|
||||
@@ -4998,7 +4846,6 @@ void thaw_workqueues(void)
|
||||
mutex_unlock(&wq->mutex);
|
||||
}
|
||||
|
||||
workqueue_freezing = false;
|
||||
out_unlock:
|
||||
mutex_unlock(&wq_pool_mutex);
|
||||
}
|
||||
@@ -5033,7 +4880,7 @@ static void __init wq_numa_init(void)
|
||||
BUG_ON(!tbl);
|
||||
|
||||
for_each_node(node)
|
||||
BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
|
||||
BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL,
|
||||
node_online(node) ? node : NUMA_NO_NODE));
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
|
@@ -37,6 +37,8 @@ struct worker {
|
||||
struct task_struct *task; /* I: worker task */
|
||||
struct worker_pool *pool; /* I: the associated pool */
|
||||
/* L: for rescuers */
|
||||
struct list_head node; /* A: anchored at pool->workers */
|
||||
/* A: runs through worker->node */
|
||||
|
||||
unsigned long last_active; /* L: last active timestamp */
|
||||
unsigned int flags; /* X: flags */
|
||||
|
Reference in New Issue
Block a user