Merge branch 'linus' into irq/core, to pick up fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -2082,7 +2082,7 @@ static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
|
||||
/* adjust offset of jmps if necessary */
|
||||
if (i < pos && i + insn->off + 1 > pos)
|
||||
insn->off += delta;
|
||||
else if (i > pos && i + insn->off + 1 < pos)
|
||||
else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
|
||||
insn->off -= delta;
|
||||
}
|
||||
}
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <net/sock.h>
|
||||
|
||||
/*
|
||||
@@ -2739,6 +2740,7 @@ out_unlock_rcu:
|
||||
out_unlock_threadgroup:
|
||||
percpu_up_write(&cgroup_threadgroup_rwsem);
|
||||
cgroup_kn_unlock(of->kn);
|
||||
cpuset_post_attach_flush();
|
||||
return ret ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -4655,14 +4657,15 @@ static void css_free_work_fn(struct work_struct *work)
|
||||
|
||||
if (ss) {
|
||||
/* css free path */
|
||||
struct cgroup_subsys_state *parent = css->parent;
|
||||
int id = css->id;
|
||||
|
||||
if (css->parent)
|
||||
css_put(css->parent);
|
||||
|
||||
ss->css_free(css);
|
||||
cgroup_idr_remove(&ss->css_idr, id);
|
||||
cgroup_put(cgrp);
|
||||
|
||||
if (parent)
|
||||
css_put(parent);
|
||||
} else {
|
||||
/* cgroup free path */
|
||||
atomic_dec(&cgrp->root->nr_cgrps);
|
||||
@@ -4758,6 +4761,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
|
||||
INIT_LIST_HEAD(&css->sibling);
|
||||
INIT_LIST_HEAD(&css->children);
|
||||
css->serial_nr = css_serial_nr_next++;
|
||||
atomic_set(&css->online_cnt, 0);
|
||||
|
||||
if (cgroup_parent(cgrp)) {
|
||||
css->parent = cgroup_css(cgroup_parent(cgrp), ss);
|
||||
@@ -4780,6 +4784,10 @@ static int online_css(struct cgroup_subsys_state *css)
|
||||
if (!ret) {
|
||||
css->flags |= CSS_ONLINE;
|
||||
rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
|
||||
|
||||
atomic_inc(&css->online_cnt);
|
||||
if (css->parent)
|
||||
atomic_inc(&css->parent->online_cnt);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@@ -5017,10 +5025,15 @@ static void css_killed_work_fn(struct work_struct *work)
|
||||
container_of(work, struct cgroup_subsys_state, destroy_work);
|
||||
|
||||
mutex_lock(&cgroup_mutex);
|
||||
offline_css(css);
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
|
||||
css_put(css);
|
||||
do {
|
||||
offline_css(css);
|
||||
css_put(css);
|
||||
/* @css can't go away while we're holding cgroup_mutex */
|
||||
css = css->parent;
|
||||
} while (css && atomic_dec_and_test(&css->online_cnt));
|
||||
|
||||
mutex_unlock(&cgroup_mutex);
|
||||
}
|
||||
|
||||
/* css kill confirmation processing requires process context, bounce */
|
||||
@@ -5029,8 +5042,10 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
|
||||
struct cgroup_subsys_state *css =
|
||||
container_of(ref, struct cgroup_subsys_state, refcnt);
|
||||
|
||||
INIT_WORK(&css->destroy_work, css_killed_work_fn);
|
||||
queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
if (atomic_dec_and_test(&css->online_cnt)) {
|
||||
INIT_WORK(&css->destroy_work, css_killed_work_fn);
|
||||
queue_work(cgroup_destroy_wq, &css->destroy_work);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -287,6 +287,8 @@ static struct cpuset top_cpuset = {
|
||||
static DEFINE_MUTEX(cpuset_mutex);
|
||||
static DEFINE_SPINLOCK(callback_lock);
|
||||
|
||||
static struct workqueue_struct *cpuset_migrate_mm_wq;
|
||||
|
||||
/*
|
||||
* CPU / memory hotplug is handled asynchronously.
|
||||
*/
|
||||
@@ -972,31 +974,51 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
|
||||
}
|
||||
|
||||
/*
|
||||
* cpuset_migrate_mm
|
||||
*
|
||||
* Migrate memory region from one set of nodes to another.
|
||||
*
|
||||
* Temporarilly set tasks mems_allowed to target nodes of migration,
|
||||
* so that the migration code can allocate pages on these nodes.
|
||||
*
|
||||
* While the mm_struct we are migrating is typically from some
|
||||
* other task, the task_struct mems_allowed that we are hacking
|
||||
* is for our current task, which must allocate new pages for that
|
||||
* migrating memory region.
|
||||
* Migrate memory region from one set of nodes to another. This is
|
||||
* performed asynchronously as it can be called from process migration path
|
||||
* holding locks involved in process management. All mm migrations are
|
||||
* performed in the queued order and can be waited for by flushing
|
||||
* cpuset_migrate_mm_wq.
|
||||
*/
|
||||
|
||||
struct cpuset_migrate_mm_work {
|
||||
struct work_struct work;
|
||||
struct mm_struct *mm;
|
||||
nodemask_t from;
|
||||
nodemask_t to;
|
||||
};
|
||||
|
||||
static void cpuset_migrate_mm_workfn(struct work_struct *work)
|
||||
{
|
||||
struct cpuset_migrate_mm_work *mwork =
|
||||
container_of(work, struct cpuset_migrate_mm_work, work);
|
||||
|
||||
/* on a wq worker, no need to worry about %current's mems_allowed */
|
||||
do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
|
||||
mmput(mwork->mm);
|
||||
kfree(mwork);
|
||||
}
|
||||
|
||||
static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
|
||||
const nodemask_t *to)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
struct cpuset_migrate_mm_work *mwork;
|
||||
|
||||
tsk->mems_allowed = *to;
|
||||
mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
|
||||
if (mwork) {
|
||||
mwork->mm = mm;
|
||||
mwork->from = *from;
|
||||
mwork->to = *to;
|
||||
INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
|
||||
queue_work(cpuset_migrate_mm_wq, &mwork->work);
|
||||
} else {
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
|
||||
do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
|
||||
|
||||
rcu_read_lock();
|
||||
guarantee_online_mems(task_cs(tsk), &tsk->mems_allowed);
|
||||
rcu_read_unlock();
|
||||
void cpuset_post_attach_flush(void)
|
||||
{
|
||||
flush_workqueue(cpuset_migrate_mm_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1097,7 +1119,8 @@ static void update_tasks_nodemask(struct cpuset *cs)
|
||||
mpol_rebind_mm(mm, &cs->mems_allowed);
|
||||
if (migrate)
|
||||
cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
|
||||
mmput(mm);
|
||||
else
|
||||
mmput(mm);
|
||||
}
|
||||
css_task_iter_end(&it);
|
||||
|
||||
@@ -1545,11 +1568,11 @@ static void cpuset_attach(struct cgroup_taskset *tset)
|
||||
* @old_mems_allowed is the right nodesets that we
|
||||
* migrate mm from.
|
||||
*/
|
||||
if (is_memory_migrate(cs)) {
|
||||
if (is_memory_migrate(cs))
|
||||
cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
|
||||
&cpuset_attach_nodemask_to);
|
||||
}
|
||||
mmput(mm);
|
||||
else
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1714,6 +1737,7 @@ out_unlock:
|
||||
mutex_unlock(&cpuset_mutex);
|
||||
kernfs_unbreak_active_protection(of->kn);
|
||||
css_put(&cs->css);
|
||||
flush_workqueue(cpuset_migrate_mm_wq);
|
||||
return retval ?: nbytes;
|
||||
}
|
||||
|
||||
@@ -2359,6 +2383,9 @@ void __init cpuset_init_smp(void)
|
||||
top_cpuset.effective_mems = node_states[N_MEMORY];
|
||||
|
||||
register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
|
||||
|
||||
cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
|
||||
BUG_ON(!cpuset_migrate_mm_wq);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -64,8 +64,17 @@ static void remote_function(void *data)
|
||||
struct task_struct *p = tfc->p;
|
||||
|
||||
if (p) {
|
||||
tfc->ret = -EAGAIN;
|
||||
if (task_cpu(p) != smp_processor_id() || !task_curr(p))
|
||||
/* -EAGAIN */
|
||||
if (task_cpu(p) != smp_processor_id())
|
||||
return;
|
||||
|
||||
/*
|
||||
* Now that we're on right CPU with IRQs disabled, we can test
|
||||
* if we hit the right task without races.
|
||||
*/
|
||||
|
||||
tfc->ret = -ESRCH; /* No such (running) process */
|
||||
if (p != current)
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -92,13 +101,17 @@ task_function_call(struct task_struct *p, remote_function_f func, void *info)
|
||||
.p = p,
|
||||
.func = func,
|
||||
.info = info,
|
||||
.ret = -ESRCH, /* No such (running) process */
|
||||
.ret = -EAGAIN,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (task_curr(p))
|
||||
smp_call_function_single(task_cpu(p), remote_function, &data, 1);
|
||||
do {
|
||||
ret = smp_call_function_single(task_cpu(p), remote_function, &data, 1);
|
||||
if (!ret)
|
||||
ret = data.ret;
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
return data.ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -169,19 +182,6 @@ static bool is_kernel_event(struct perf_event *event)
|
||||
* rely on ctx->is_active and therefore cannot use event_function_call().
|
||||
* See perf_install_in_context().
|
||||
*
|
||||
* This is because we need a ctx->lock serialized variable (ctx->is_active)
|
||||
* to reliably determine if a particular task/context is scheduled in. The
|
||||
* task_curr() use in task_function_call() is racy in that a remote context
|
||||
* switch is not a single atomic operation.
|
||||
*
|
||||
* As is, the situation is 'safe' because we set rq->curr before we do the
|
||||
* actual context switch. This means that task_curr() will fail early, but
|
||||
* we'll continue spinning on ctx->is_active until we've passed
|
||||
* perf_event_task_sched_out().
|
||||
*
|
||||
* Without this ctx->lock serialized variable we could have race where we find
|
||||
* the task (and hence the context) would not be active while in fact they are.
|
||||
*
|
||||
* If ctx->nr_events, then ctx->is_active and cpuctx->task_ctx are set.
|
||||
*/
|
||||
|
||||
@@ -212,7 +212,7 @@ static int event_function(void *info)
|
||||
*/
|
||||
if (ctx->task) {
|
||||
if (ctx->task != current) {
|
||||
ret = -EAGAIN;
|
||||
ret = -ESRCH;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
@@ -276,10 +276,10 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
|
||||
return;
|
||||
}
|
||||
|
||||
again:
|
||||
if (task == TASK_TOMBSTONE)
|
||||
return;
|
||||
|
||||
again:
|
||||
if (!task_function_call(task, event_function, &efs))
|
||||
return;
|
||||
|
||||
@@ -289,13 +289,15 @@ again:
|
||||
* a concurrent perf_event_context_sched_out().
|
||||
*/
|
||||
task = ctx->task;
|
||||
if (task != TASK_TOMBSTONE) {
|
||||
if (ctx->is_active) {
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
goto again;
|
||||
}
|
||||
func(event, NULL, ctx, data);
|
||||
if (task == TASK_TOMBSTONE) {
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
return;
|
||||
}
|
||||
if (ctx->is_active) {
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
goto again;
|
||||
}
|
||||
func(event, NULL, ctx, data);
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
}
|
||||
|
||||
@@ -314,6 +316,7 @@ again:
|
||||
enum event_type_t {
|
||||
EVENT_FLEXIBLE = 0x1,
|
||||
EVENT_PINNED = 0x2,
|
||||
EVENT_TIME = 0x4,
|
||||
EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED,
|
||||
};
|
||||
|
||||
@@ -321,7 +324,13 @@ enum event_type_t {
|
||||
* perf_sched_events : >0 events exist
|
||||
* perf_cgroup_events: >0 per-cpu cgroup events exist on this cpu
|
||||
*/
|
||||
struct static_key_deferred perf_sched_events __read_mostly;
|
||||
|
||||
static void perf_sched_delayed(struct work_struct *work);
|
||||
DEFINE_STATIC_KEY_FALSE(perf_sched_events);
|
||||
static DECLARE_DELAYED_WORK(perf_sched_work, perf_sched_delayed);
|
||||
static DEFINE_MUTEX(perf_sched_mutex);
|
||||
static atomic_t perf_sched_count;
|
||||
|
||||
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
|
||||
static DEFINE_PER_CPU(int, perf_sched_cb_usages);
|
||||
|
||||
@@ -1288,16 +1297,18 @@ static u64 perf_event_time(struct perf_event *event)
|
||||
|
||||
/*
|
||||
* Update the total_time_enabled and total_time_running fields for a event.
|
||||
* The caller of this function needs to hold the ctx->lock.
|
||||
*/
|
||||
static void update_event_times(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
u64 run_end;
|
||||
|
||||
lockdep_assert_held(&ctx->lock);
|
||||
|
||||
if (event->state < PERF_EVENT_STATE_INACTIVE ||
|
||||
event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* in cgroup mode, time_enabled represents
|
||||
* the time the event was enabled AND active
|
||||
@@ -1645,7 +1656,7 @@ out:
|
||||
|
||||
static bool is_orphaned_event(struct perf_event *event)
|
||||
{
|
||||
return event->state == PERF_EVENT_STATE_EXIT;
|
||||
return event->state == PERF_EVENT_STATE_DEAD;
|
||||
}
|
||||
|
||||
static inline int pmu_filter_match(struct perf_event *event)
|
||||
@@ -1690,14 +1701,14 @@ event_sched_out(struct perf_event *event,
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
event->tstamp_stopped = tstamp;
|
||||
event->pmu->del(event, 0);
|
||||
event->oncpu = -1;
|
||||
event->state = PERF_EVENT_STATE_INACTIVE;
|
||||
if (event->pending_disable) {
|
||||
event->pending_disable = 0;
|
||||
event->state = PERF_EVENT_STATE_OFF;
|
||||
}
|
||||
event->tstamp_stopped = tstamp;
|
||||
event->pmu->del(event, 0);
|
||||
event->oncpu = -1;
|
||||
|
||||
if (!is_software_event(event))
|
||||
cpuctx->active_oncpu--;
|
||||
@@ -1732,7 +1743,6 @@ group_sched_out(struct perf_event *group_event,
|
||||
}
|
||||
|
||||
#define DETACH_GROUP 0x01UL
|
||||
#define DETACH_STATE 0x02UL
|
||||
|
||||
/*
|
||||
* Cross CPU call to remove a performance event
|
||||
@@ -1752,8 +1762,6 @@ __perf_remove_from_context(struct perf_event *event,
|
||||
if (flags & DETACH_GROUP)
|
||||
perf_group_detach(event);
|
||||
list_del_event(event, ctx);
|
||||
if (flags & DETACH_STATE)
|
||||
event->state = PERF_EVENT_STATE_EXIT;
|
||||
|
||||
if (!ctx->nr_events && ctx->is_active) {
|
||||
ctx->is_active = 0;
|
||||
@@ -2063,14 +2071,27 @@ static void add_event_to_ctx(struct perf_event *event,
|
||||
event->tstamp_stopped = tstamp;
|
||||
}
|
||||
|
||||
static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx);
|
||||
static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
enum event_type_t event_type);
|
||||
static void
|
||||
ctx_sched_in(struct perf_event_context *ctx,
|
||||
struct perf_cpu_context *cpuctx,
|
||||
enum event_type_t event_type,
|
||||
struct task_struct *task);
|
||||
|
||||
static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
if (!cpuctx->task_ctx)
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
|
||||
return;
|
||||
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_ALL);
|
||||
}
|
||||
|
||||
static void perf_event_sched_in(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx,
|
||||
struct task_struct *task)
|
||||
@@ -2097,49 +2118,68 @@ static void ctx_resched(struct perf_cpu_context *cpuctx,
|
||||
/*
|
||||
* Cross CPU call to install and enable a performance event
|
||||
*
|
||||
* Must be called with ctx->mutex held
|
||||
* Very similar to remote_function() + event_function() but cannot assume that
|
||||
* things like ctx->is_active and cpuctx->task_ctx are set.
|
||||
*/
|
||||
static int __perf_install_in_context(void *info)
|
||||
{
|
||||
struct perf_event_context *ctx = info;
|
||||
struct perf_event *event = info;
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
|
||||
struct perf_event_context *task_ctx = cpuctx->task_ctx;
|
||||
bool activate = true;
|
||||
int ret = 0;
|
||||
|
||||
raw_spin_lock(&cpuctx->ctx.lock);
|
||||
if (ctx->task) {
|
||||
raw_spin_lock(&ctx->lock);
|
||||
/*
|
||||
* If we hit the 'wrong' task, we've since scheduled and
|
||||
* everything should be sorted, nothing to do!
|
||||
*/
|
||||
task_ctx = ctx;
|
||||
if (ctx->task != current)
|
||||
|
||||
/* If we're on the wrong CPU, try again */
|
||||
if (task_cpu(ctx->task) != smp_processor_id()) {
|
||||
ret = -ESRCH;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* If task_ctx is set, it had better be to us.
|
||||
* If we're on the right CPU, see if the task we target is
|
||||
* current, if not we don't have to activate the ctx, a future
|
||||
* context switch will do that for us.
|
||||
*/
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx);
|
||||
if (ctx->task != current)
|
||||
activate = false;
|
||||
else
|
||||
WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx);
|
||||
|
||||
} else if (task_ctx) {
|
||||
raw_spin_lock(&task_ctx->lock);
|
||||
}
|
||||
|
||||
ctx_resched(cpuctx, task_ctx);
|
||||
if (activate) {
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
||||
add_event_to_ctx(event, ctx);
|
||||
ctx_resched(cpuctx, task_ctx);
|
||||
} else {
|
||||
add_event_to_ctx(event, ctx);
|
||||
}
|
||||
|
||||
unlock:
|
||||
perf_ctx_unlock(cpuctx, task_ctx);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Attach a performance event to a context
|
||||
* Attach a performance event to a context.
|
||||
*
|
||||
* Very similar to event_function_call, see comment there.
|
||||
*/
|
||||
static void
|
||||
perf_install_in_context(struct perf_event_context *ctx,
|
||||
struct perf_event *event,
|
||||
int cpu)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
struct task_struct *task = READ_ONCE(ctx->task);
|
||||
|
||||
lockdep_assert_held(&ctx->mutex);
|
||||
|
||||
@@ -2147,40 +2187,46 @@ perf_install_in_context(struct perf_event_context *ctx,
|
||||
if (event->cpu != -1)
|
||||
event->cpu = cpu;
|
||||
|
||||
if (!task) {
|
||||
cpu_function_call(cpu, __perf_install_in_context, event);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should not happen, we validate the ctx is still alive before calling.
|
||||
*/
|
||||
if (WARN_ON_ONCE(task == TASK_TOMBSTONE))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Installing events is tricky because we cannot rely on ctx->is_active
|
||||
* to be set in case this is the nr_events 0 -> 1 transition.
|
||||
*
|
||||
* So what we do is we add the event to the list here, which will allow
|
||||
* a future context switch to DTRT and then send a racy IPI. If the IPI
|
||||
* fails to hit the right task, this means a context switch must have
|
||||
* happened and that will have taken care of business.
|
||||
*/
|
||||
again:
|
||||
/*
|
||||
* Cannot use task_function_call() because we need to run on the task's
|
||||
* CPU regardless of whether its current or not.
|
||||
*/
|
||||
if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irq(&ctx->lock);
|
||||
task = ctx->task;
|
||||
/*
|
||||
* Worse, we cannot even rely on the ctx actually existing anymore. If
|
||||
* between find_get_context() and perf_install_in_context() the task
|
||||
* went through perf_event_exit_task() its dead and we should not be
|
||||
* adding new events.
|
||||
*/
|
||||
if (task == TASK_TOMBSTONE) {
|
||||
if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) {
|
||||
/*
|
||||
* Cannot happen because we already checked above (which also
|
||||
* cannot happen), and we hold ctx->mutex, which serializes us
|
||||
* against perf_event_exit_task_context().
|
||||
*/
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
return;
|
||||
}
|
||||
update_context_time(ctx);
|
||||
/*
|
||||
* Update cgrp time only if current cgrp matches event->cgrp.
|
||||
* Must be done before calling add_event_to_ctx().
|
||||
*/
|
||||
update_cgrp_time_from_event(event);
|
||||
add_event_to_ctx(event, ctx);
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
|
||||
if (task)
|
||||
task_function_call(task, __perf_install_in_context, ctx);
|
||||
else
|
||||
cpu_function_call(cpu, __perf_install_in_context, ctx);
|
||||
/*
|
||||
* Since !ctx->is_active doesn't mean anything, we must IPI
|
||||
* unconditionally.
|
||||
*/
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2219,17 +2265,18 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
event->state <= PERF_EVENT_STATE_ERROR)
|
||||
return;
|
||||
|
||||
update_context_time(ctx);
|
||||
if (ctx->is_active)
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
||||
|
||||
__perf_event_mark_enabled(event);
|
||||
|
||||
if (!ctx->is_active)
|
||||
return;
|
||||
|
||||
if (!event_filter_match(event)) {
|
||||
if (is_cgroup_event(event)) {
|
||||
perf_cgroup_set_timestamp(current, ctx); // XXX ?
|
||||
if (is_cgroup_event(event))
|
||||
perf_cgroup_defer_enabled(event);
|
||||
}
|
||||
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2237,8 +2284,10 @@ static void __perf_event_enable(struct perf_event *event,
|
||||
* If the event is in a group and isn't the group leader,
|
||||
* then don't put it on unless the group is on.
|
||||
*/
|
||||
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
|
||||
if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE) {
|
||||
ctx_sched_in(ctx, cpuctx, EVENT_TIME, current);
|
||||
return;
|
||||
}
|
||||
|
||||
task_ctx = cpuctx->task_ctx;
|
||||
if (ctx->task)
|
||||
@@ -2344,24 +2393,33 @@ static void ctx_sched_out(struct perf_event_context *ctx,
|
||||
}
|
||||
|
||||
ctx->is_active &= ~event_type;
|
||||
if (!(ctx->is_active & EVENT_ALL))
|
||||
ctx->is_active = 0;
|
||||
|
||||
if (ctx->task) {
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
if (!ctx->is_active)
|
||||
cpuctx->task_ctx = NULL;
|
||||
}
|
||||
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
if (!ctx->nr_active)
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* update (and stop) ctx time */
|
||||
update_context_time(ctx);
|
||||
update_cgrp_time_from_cpuctx(cpuctx);
|
||||
}
|
||||
|
||||
if (!ctx->nr_active || !(is_active & EVENT_ALL))
|
||||
return;
|
||||
|
||||
perf_pmu_disable(ctx->pmu);
|
||||
if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) {
|
||||
if (is_active & EVENT_PINNED) {
|
||||
list_for_each_entry(event, &ctx->pinned_groups, group_entry)
|
||||
group_sched_out(event, cpuctx, ctx);
|
||||
}
|
||||
|
||||
if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) {
|
||||
if (is_active & EVENT_FLEXIBLE) {
|
||||
list_for_each_entry(event, &ctx->flexible_groups, group_entry)
|
||||
group_sched_out(event, cpuctx, ctx);
|
||||
}
|
||||
@@ -2641,18 +2699,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
|
||||
perf_cgroup_sched_out(task, next);
|
||||
}
|
||||
|
||||
static void task_ctx_sched_out(struct perf_cpu_context *cpuctx,
|
||||
struct perf_event_context *ctx)
|
||||
{
|
||||
if (!cpuctx->task_ctx)
|
||||
return;
|
||||
|
||||
if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
|
||||
return;
|
||||
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_ALL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called with IRQs disabled
|
||||
*/
|
||||
@@ -2735,7 +2781,7 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
if (likely(!ctx->nr_events))
|
||||
return;
|
||||
|
||||
ctx->is_active |= event_type;
|
||||
ctx->is_active |= (event_type | EVENT_TIME);
|
||||
if (ctx->task) {
|
||||
if (!is_active)
|
||||
cpuctx->task_ctx = ctx;
|
||||
@@ -2743,18 +2789,24 @@ ctx_sched_in(struct perf_event_context *ctx,
|
||||
WARN_ON_ONCE(cpuctx->task_ctx != ctx);
|
||||
}
|
||||
|
||||
now = perf_clock();
|
||||
ctx->timestamp = now;
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
is_active ^= ctx->is_active; /* changed bits */
|
||||
|
||||
if (is_active & EVENT_TIME) {
|
||||
/* start ctx time */
|
||||
now = perf_clock();
|
||||
ctx->timestamp = now;
|
||||
perf_cgroup_set_timestamp(task, ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* First go through the list and put on any pinned groups
|
||||
* in order to give them the best chance of going on.
|
||||
*/
|
||||
if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
|
||||
if (is_active & EVENT_PINNED)
|
||||
ctx_pinned_sched_in(ctx, cpuctx);
|
||||
|
||||
/* Then walk through the lower prio flexible groups */
|
||||
if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
|
||||
if (is_active & EVENT_FLEXIBLE)
|
||||
ctx_flexible_sched_in(ctx, cpuctx);
|
||||
}
|
||||
|
||||
@@ -3120,6 +3172,7 @@ static void perf_event_enable_on_exec(int ctxn)
|
||||
|
||||
cpuctx = __get_cpu_context(ctx);
|
||||
perf_ctx_lock(cpuctx, ctx);
|
||||
ctx_sched_out(ctx, cpuctx, EVENT_TIME);
|
||||
list_for_each_entry(event, &ctx->event_list, event_entry)
|
||||
enabled |= event_enable_on_exec(event, ctx);
|
||||
|
||||
@@ -3537,12 +3590,22 @@ static void unaccount_event(struct perf_event *event)
|
||||
if (has_branch_stack(event))
|
||||
dec = true;
|
||||
|
||||
if (dec)
|
||||
static_key_slow_dec_deferred(&perf_sched_events);
|
||||
if (dec) {
|
||||
if (!atomic_add_unless(&perf_sched_count, -1, 1))
|
||||
schedule_delayed_work(&perf_sched_work, HZ);
|
||||
}
|
||||
|
||||
unaccount_event_cpu(event, event->cpu);
|
||||
}
|
||||
|
||||
static void perf_sched_delayed(struct work_struct *work)
|
||||
{
|
||||
mutex_lock(&perf_sched_mutex);
|
||||
if (atomic_dec_and_test(&perf_sched_count))
|
||||
static_branch_disable(&perf_sched_events);
|
||||
mutex_unlock(&perf_sched_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* The following implement mutual exclusion of events on "exclusive" pmus
|
||||
* (PERF_PMU_CAP_EXCLUSIVE). Such pmus can only have one event scheduled
|
||||
@@ -3752,30 +3815,42 @@ static void put_event(struct perf_event *event)
|
||||
*/
|
||||
int perf_event_release_kernel(struct perf_event *event)
|
||||
{
|
||||
struct perf_event_context *ctx;
|
||||
struct perf_event_context *ctx = event->ctx;
|
||||
struct perf_event *child, *tmp;
|
||||
|
||||
/*
|
||||
* If we got here through err_file: fput(event_file); we will not have
|
||||
* attached to a context yet.
|
||||
*/
|
||||
if (!ctx) {
|
||||
WARN_ON_ONCE(event->attach_state &
|
||||
(PERF_ATTACH_CONTEXT|PERF_ATTACH_GROUP));
|
||||
goto no_ctx;
|
||||
}
|
||||
|
||||
if (!is_kernel_event(event))
|
||||
perf_remove_from_owner(event);
|
||||
|
||||
ctx = perf_event_ctx_lock(event);
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
perf_remove_from_context(event, DETACH_GROUP | DETACH_STATE);
|
||||
perf_event_ctx_unlock(event, ctx);
|
||||
perf_remove_from_context(event, DETACH_GROUP);
|
||||
|
||||
raw_spin_lock_irq(&ctx->lock);
|
||||
/*
|
||||
* At this point we must have event->state == PERF_EVENT_STATE_EXIT,
|
||||
* either from the above perf_remove_from_context() or through
|
||||
* perf_event_exit_event().
|
||||
* Mark this even as STATE_DEAD, there is no external reference to it
|
||||
* anymore.
|
||||
*
|
||||
* Therefore, anybody acquiring event->child_mutex after the below
|
||||
* loop _must_ also see this, most importantly inherit_event() which
|
||||
* will avoid placing more children on the list.
|
||||
* Anybody acquiring event->child_mutex after the below loop _must_
|
||||
* also see this, most importantly inherit_event() which will avoid
|
||||
* placing more children on the list.
|
||||
*
|
||||
* Thus this guarantees that we will in fact observe and kill _ALL_
|
||||
* child events.
|
||||
*/
|
||||
WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
|
||||
event->state = PERF_EVENT_STATE_DEAD;
|
||||
raw_spin_unlock_irq(&ctx->lock);
|
||||
|
||||
perf_event_ctx_unlock(event, ctx);
|
||||
|
||||
again:
|
||||
mutex_lock(&event->child_mutex);
|
||||
@@ -3830,8 +3905,8 @@ again:
|
||||
}
|
||||
mutex_unlock(&event->child_mutex);
|
||||
|
||||
/* Must be the last reference */
|
||||
put_event(event);
|
||||
no_ctx:
|
||||
put_event(event); /* Must be the 'last' reference */
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||
@@ -3988,7 +4063,7 @@ static bool is_event_hup(struct perf_event *event)
|
||||
{
|
||||
bool no_children;
|
||||
|
||||
if (event->state != PERF_EVENT_STATE_EXIT)
|
||||
if (event->state > PERF_EVENT_STATE_EXIT)
|
||||
return false;
|
||||
|
||||
mutex_lock(&event->child_mutex);
|
||||
@@ -7769,8 +7844,28 @@ static void account_event(struct perf_event *event)
|
||||
if (is_cgroup_event(event))
|
||||
inc = true;
|
||||
|
||||
if (inc)
|
||||
static_key_slow_inc(&perf_sched_events.key);
|
||||
if (inc) {
|
||||
if (atomic_inc_not_zero(&perf_sched_count))
|
||||
goto enabled;
|
||||
|
||||
mutex_lock(&perf_sched_mutex);
|
||||
if (!atomic_read(&perf_sched_count)) {
|
||||
static_branch_enable(&perf_sched_events);
|
||||
/*
|
||||
* Guarantee that all CPUs observe they key change and
|
||||
* call the perf scheduling hooks before proceeding to
|
||||
* install events that need them.
|
||||
*/
|
||||
synchronize_sched();
|
||||
}
|
||||
/*
|
||||
* Now that we have waited for the sync_sched(), allow further
|
||||
* increments to by-pass the mutex.
|
||||
*/
|
||||
atomic_inc(&perf_sched_count);
|
||||
mutex_unlock(&perf_sched_mutex);
|
||||
}
|
||||
enabled:
|
||||
|
||||
account_event_cpu(event, event->cpu);
|
||||
}
|
||||
@@ -8389,10 +8484,19 @@ SYSCALL_DEFINE5(perf_event_open,
|
||||
if (move_group) {
|
||||
gctx = group_leader->ctx;
|
||||
mutex_lock_double(&gctx->mutex, &ctx->mutex);
|
||||
if (gctx->task == TASK_TOMBSTONE) {
|
||||
err = -ESRCH;
|
||||
goto err_locked;
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&ctx->mutex);
|
||||
}
|
||||
|
||||
if (ctx->task == TASK_TOMBSTONE) {
|
||||
err = -ESRCH;
|
||||
goto err_locked;
|
||||
}
|
||||
|
||||
if (!perf_event_validate_size(event)) {
|
||||
err = -E2BIG;
|
||||
goto err_locked;
|
||||
@@ -8509,7 +8613,12 @@ err_context:
|
||||
perf_unpin_context(ctx);
|
||||
put_ctx(ctx);
|
||||
err_alloc:
|
||||
free_event(event);
|
||||
/*
|
||||
* If event_file is set, the fput() above will have called ->release()
|
||||
* and that will take care of freeing the event.
|
||||
*/
|
||||
if (!event_file)
|
||||
free_event(event);
|
||||
err_cpus:
|
||||
put_online_cpus();
|
||||
err_task:
|
||||
@@ -8563,12 +8672,14 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
|
||||
WARN_ON_ONCE(ctx->parent_ctx);
|
||||
mutex_lock(&ctx->mutex);
|
||||
if (ctx->task == TASK_TOMBSTONE) {
|
||||
err = -ESRCH;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (!exclusive_event_installable(event, ctx)) {
|
||||
mutex_unlock(&ctx->mutex);
|
||||
perf_unpin_context(ctx);
|
||||
put_ctx(ctx);
|
||||
err = -EBUSY;
|
||||
goto err_free;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
@@ -8577,6 +8688,10 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
||||
|
||||
return event;
|
||||
|
||||
err_unlock:
|
||||
mutex_unlock(&ctx->mutex);
|
||||
perf_unpin_context(ctx);
|
||||
put_ctx(ctx);
|
||||
err_free:
|
||||
free_event(event);
|
||||
err:
|
||||
@@ -8695,7 +8810,7 @@ perf_event_exit_event(struct perf_event *child_event,
|
||||
if (parent_event)
|
||||
perf_group_detach(child_event);
|
||||
list_del_event(child_event, child_ctx);
|
||||
child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
|
||||
child_event->state = PERF_EVENT_STATE_EXIT; /* is_event_hup() */
|
||||
raw_spin_unlock_irq(&child_ctx->lock);
|
||||
|
||||
/*
|
||||
@@ -9206,7 +9321,7 @@ static void perf_event_init_cpu(int cpu)
|
||||
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
|
||||
|
||||
mutex_lock(&swhash->hlist_mutex);
|
||||
if (swhash->hlist_refcount > 0) {
|
||||
if (swhash->hlist_refcount > 0 && !swevent_hlist_deref(swhash)) {
|
||||
struct swevent_hlist *hlist;
|
||||
|
||||
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
|
||||
@@ -9282,11 +9397,9 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
|
||||
case CPU_UP_PREPARE:
|
||||
case CPU_DOWN_FAILED:
|
||||
perf_event_init_cpu(cpu);
|
||||
break;
|
||||
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DOWN_PREPARE:
|
||||
perf_event_exit_cpu(cpu);
|
||||
break;
|
||||
@@ -9315,9 +9428,6 @@ void __init perf_event_init(void)
|
||||
ret = init_hw_breakpoint();
|
||||
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
|
||||
|
||||
/* do not patch jump label more than once per second */
|
||||
jump_label_rate_limit(&perf_sched_events, HZ);
|
||||
|
||||
/*
|
||||
* Build time assertion that we keep the data_head at the intended
|
||||
* location. IOW, validation we got the __reserved[] size right.
|
||||
|
@@ -292,7 +292,7 @@ LIST_HEAD(all_lock_classes);
|
||||
#define __classhashfn(key) hash_long((unsigned long)key, CLASSHASH_BITS)
|
||||
#define classhashentry(key) (classhash_table + __classhashfn((key)))
|
||||
|
||||
static struct list_head classhash_table[CLASSHASH_SIZE];
|
||||
static struct hlist_head classhash_table[CLASSHASH_SIZE];
|
||||
|
||||
/*
|
||||
* We put the lock dependency chains into a hash-table as well, to cache
|
||||
@@ -303,7 +303,7 @@ static struct list_head classhash_table[CLASSHASH_SIZE];
|
||||
#define __chainhashfn(chain) hash_long(chain, CHAINHASH_BITS)
|
||||
#define chainhashentry(chain) (chainhash_table + __chainhashfn((chain)))
|
||||
|
||||
static struct list_head chainhash_table[CHAINHASH_SIZE];
|
||||
static struct hlist_head chainhash_table[CHAINHASH_SIZE];
|
||||
|
||||
/*
|
||||
* The hash key of the lock dependency chains is a hash itself too:
|
||||
@@ -666,7 +666,7 @@ static inline struct lock_class *
|
||||
look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
|
||||
{
|
||||
struct lockdep_subclass_key *key;
|
||||
struct list_head *hash_head;
|
||||
struct hlist_head *hash_head;
|
||||
struct lock_class *class;
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
@@ -719,7 +719,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
|
||||
if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
|
||||
return NULL;
|
||||
|
||||
list_for_each_entry_rcu(class, hash_head, hash_entry) {
|
||||
hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
|
||||
if (class->key == key) {
|
||||
/*
|
||||
* Huh! same key, different name? Did someone trample
|
||||
@@ -742,7 +742,7 @@ static inline struct lock_class *
|
||||
register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
|
||||
{
|
||||
struct lockdep_subclass_key *key;
|
||||
struct list_head *hash_head;
|
||||
struct hlist_head *hash_head;
|
||||
struct lock_class *class;
|
||||
|
||||
DEBUG_LOCKS_WARN_ON(!irqs_disabled());
|
||||
@@ -774,7 +774,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
|
||||
* We have to do the hash-walk again, to avoid races
|
||||
* with another CPU:
|
||||
*/
|
||||
list_for_each_entry_rcu(class, hash_head, hash_entry) {
|
||||
hlist_for_each_entry_rcu(class, hash_head, hash_entry) {
|
||||
if (class->key == key)
|
||||
goto out_unlock_set;
|
||||
}
|
||||
@@ -805,7 +805,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
|
||||
* We use RCU's safe list-add method to make
|
||||
* parallel walking of the hash-list safe:
|
||||
*/
|
||||
list_add_tail_rcu(&class->hash_entry, hash_head);
|
||||
hlist_add_head_rcu(&class->hash_entry, hash_head);
|
||||
/*
|
||||
* Add it to the global list of classes:
|
||||
*/
|
||||
@@ -1822,7 +1822,7 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
|
||||
*/
|
||||
static int
|
||||
check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next, int distance, int trylock_loop)
|
||||
struct held_lock *next, int distance, int *stack_saved)
|
||||
{
|
||||
struct lock_list *entry;
|
||||
int ret;
|
||||
@@ -1883,8 +1883,11 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
}
|
||||
}
|
||||
|
||||
if (!trylock_loop && !save_trace(&trace))
|
||||
return 0;
|
||||
if (!*stack_saved) {
|
||||
if (!save_trace(&trace))
|
||||
return 0;
|
||||
*stack_saved = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok, all validations passed, add the new lock
|
||||
@@ -1907,6 +1910,8 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
* Debugging printouts:
|
||||
*/
|
||||
if (verbose(hlock_class(prev)) || verbose(hlock_class(next))) {
|
||||
/* We drop graph lock, so another thread can overwrite trace. */
|
||||
*stack_saved = 0;
|
||||
graph_unlock();
|
||||
printk("\n new dependency: ");
|
||||
print_lock_name(hlock_class(prev));
|
||||
@@ -1929,7 +1934,7 @@ static int
|
||||
check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
{
|
||||
int depth = curr->lockdep_depth;
|
||||
int trylock_loop = 0;
|
||||
int stack_saved = 0;
|
||||
struct held_lock *hlock;
|
||||
|
||||
/*
|
||||
@@ -1956,7 +1961,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
*/
|
||||
if (hlock->read != 2 && hlock->check) {
|
||||
if (!check_prev_add(curr, hlock, next,
|
||||
distance, trylock_loop))
|
||||
distance, &stack_saved))
|
||||
return 0;
|
||||
/*
|
||||
* Stop after the first non-trylock entry,
|
||||
@@ -1979,7 +1984,6 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
if (curr->held_locks[depth].irq_context !=
|
||||
curr->held_locks[depth-1].irq_context)
|
||||
break;
|
||||
trylock_loop = 1;
|
||||
}
|
||||
return 1;
|
||||
out_bug:
|
||||
@@ -2017,7 +2021,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
u64 chain_key)
|
||||
{
|
||||
struct lock_class *class = hlock_class(hlock);
|
||||
struct list_head *hash_head = chainhashentry(chain_key);
|
||||
struct hlist_head *hash_head = chainhashentry(chain_key);
|
||||
struct lock_chain *chain;
|
||||
struct held_lock *hlock_curr;
|
||||
int i, j;
|
||||
@@ -2033,7 +2037,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
* We can walk it lock-free, because entries only get added
|
||||
* to the hash:
|
||||
*/
|
||||
list_for_each_entry_rcu(chain, hash_head, entry) {
|
||||
hlist_for_each_entry_rcu(chain, hash_head, entry) {
|
||||
if (chain->chain_key == chain_key) {
|
||||
cache_hit:
|
||||
debug_atomic_inc(chain_lookup_hits);
|
||||
@@ -2057,7 +2061,7 @@ cache_hit:
|
||||
/*
|
||||
* We have to walk the chain again locked - to avoid duplicates:
|
||||
*/
|
||||
list_for_each_entry(chain, hash_head, entry) {
|
||||
hlist_for_each_entry(chain, hash_head, entry) {
|
||||
if (chain->chain_key == chain_key) {
|
||||
graph_unlock();
|
||||
goto cache_hit;
|
||||
@@ -2091,7 +2095,7 @@ cache_hit:
|
||||
}
|
||||
chain_hlocks[chain->base + j] = class - lock_classes;
|
||||
}
|
||||
list_add_tail_rcu(&chain->entry, hash_head);
|
||||
hlist_add_head_rcu(&chain->entry, hash_head);
|
||||
debug_atomic_inc(chain_lookup_misses);
|
||||
inc_chains();
|
||||
|
||||
@@ -3875,7 +3879,7 @@ void lockdep_reset(void)
|
||||
nr_process_chains = 0;
|
||||
debug_locks = 1;
|
||||
for (i = 0; i < CHAINHASH_SIZE; i++)
|
||||
INIT_LIST_HEAD(chainhash_table + i);
|
||||
INIT_HLIST_HEAD(chainhash_table + i);
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
@@ -3894,7 +3898,7 @@ static void zap_class(struct lock_class *class)
|
||||
/*
|
||||
* Unhash the class and remove it from the all_lock_classes list:
|
||||
*/
|
||||
list_del_rcu(&class->hash_entry);
|
||||
hlist_del_rcu(&class->hash_entry);
|
||||
list_del_rcu(&class->lock_entry);
|
||||
|
||||
RCU_INIT_POINTER(class->key, NULL);
|
||||
@@ -3917,7 +3921,7 @@ static inline int within(const void *addr, void *start, unsigned long size)
|
||||
void lockdep_free_key_range(void *start, unsigned long size)
|
||||
{
|
||||
struct lock_class *class;
|
||||
struct list_head *head;
|
||||
struct hlist_head *head;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
int locked;
|
||||
@@ -3930,9 +3934,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
|
||||
*/
|
||||
for (i = 0; i < CLASSHASH_SIZE; i++) {
|
||||
head = classhash_table + i;
|
||||
if (list_empty(head))
|
||||
continue;
|
||||
list_for_each_entry_rcu(class, head, hash_entry) {
|
||||
hlist_for_each_entry_rcu(class, head, hash_entry) {
|
||||
if (within(class->key, start, size))
|
||||
zap_class(class);
|
||||
else if (within(class->name, start, size))
|
||||
@@ -3962,7 +3964,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
|
||||
void lockdep_reset_lock(struct lockdep_map *lock)
|
||||
{
|
||||
struct lock_class *class;
|
||||
struct list_head *head;
|
||||
struct hlist_head *head;
|
||||
unsigned long flags;
|
||||
int i, j;
|
||||
int locked;
|
||||
@@ -3987,9 +3989,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
|
||||
locked = graph_lock();
|
||||
for (i = 0; i < CLASSHASH_SIZE; i++) {
|
||||
head = classhash_table + i;
|
||||
if (list_empty(head))
|
||||
continue;
|
||||
list_for_each_entry_rcu(class, head, hash_entry) {
|
||||
hlist_for_each_entry_rcu(class, head, hash_entry) {
|
||||
int match = 0;
|
||||
|
||||
for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
|
||||
@@ -4027,10 +4027,10 @@ void lockdep_init(void)
|
||||
return;
|
||||
|
||||
for (i = 0; i < CLASSHASH_SIZE; i++)
|
||||
INIT_LIST_HEAD(classhash_table + i);
|
||||
INIT_HLIST_HEAD(classhash_table + i);
|
||||
|
||||
for (i = 0; i < CHAINHASH_SIZE; i++)
|
||||
INIT_LIST_HEAD(chainhash_table + i);
|
||||
INIT_HLIST_HEAD(chainhash_table + i);
|
||||
|
||||
lockdep_initialized = 1;
|
||||
}
|
||||
|
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(memunmap);
|
||||
|
||||
static void devm_memremap_release(struct device *dev, void *res)
|
||||
{
|
||||
memunmap(res);
|
||||
memunmap(*(void **)res);
|
||||
}
|
||||
|
||||
static int devm_memremap_match(struct device *dev, void *res, void *match_data)
|
||||
@@ -136,8 +136,10 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
|
||||
if (addr) {
|
||||
*ptr = addr;
|
||||
devres_add(dev, ptr);
|
||||
} else
|
||||
} else {
|
||||
devres_free(ptr);
|
||||
return ERR_PTR(-ENXIO);
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
@@ -150,7 +152,7 @@ void devm_memunmap(struct device *dev, void *addr)
|
||||
}
|
||||
EXPORT_SYMBOL(devm_memunmap);
|
||||
|
||||
pfn_t phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
|
||||
pfn_t phys_to_pfn_t(phys_addr_t addr, u64 flags)
|
||||
{
|
||||
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
|
||||
}
|
||||
|
122
kernel/module.c
122
kernel/module.c
@@ -303,6 +303,9 @@ struct load_info {
|
||||
struct _ddebug *debug;
|
||||
unsigned int num_debug;
|
||||
bool sig_ok;
|
||||
#ifdef CONFIG_KALLSYMS
|
||||
unsigned long mod_kallsyms_init_off;
|
||||
#endif
|
||||
struct {
|
||||
unsigned int sym, str, mod, vers, info, pcpu;
|
||||
} index;
|
||||
@@ -981,6 +984,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
|
||||
mod->exit();
|
||||
blocking_notifier_call_chain(&module_notify_list,
|
||||
MODULE_STATE_GOING, mod);
|
||||
ftrace_release_mod(mod);
|
||||
|
||||
async_synchronize_full();
|
||||
|
||||
/* Store the name of the last unloaded module for diagnostic purposes */
|
||||
@@ -2480,10 +2485,21 @@ static void layout_symtab(struct module *mod, struct load_info *info)
|
||||
strsect->sh_flags |= SHF_ALLOC;
|
||||
strsect->sh_entsize = get_offset(mod, &mod->init_layout.size, strsect,
|
||||
info->index.str) | INIT_OFFSET_MASK;
|
||||
mod->init_layout.size = debug_align(mod->init_layout.size);
|
||||
pr_debug("\t%s\n", info->secstrings + strsect->sh_name);
|
||||
|
||||
/* We'll tack temporary mod_kallsyms on the end. */
|
||||
mod->init_layout.size = ALIGN(mod->init_layout.size,
|
||||
__alignof__(struct mod_kallsyms));
|
||||
info->mod_kallsyms_init_off = mod->init_layout.size;
|
||||
mod->init_layout.size += sizeof(struct mod_kallsyms);
|
||||
mod->init_layout.size = debug_align(mod->init_layout.size);
|
||||
}
|
||||
|
||||
/*
|
||||
* We use the full symtab and strtab which layout_symtab arranged to
|
||||
* be appended to the init section. Later we switch to the cut-down
|
||||
* core-only ones.
|
||||
*/
|
||||
static void add_kallsyms(struct module *mod, const struct load_info *info)
|
||||
{
|
||||
unsigned int i, ndst;
|
||||
@@ -2492,29 +2508,34 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
|
||||
char *s;
|
||||
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
|
||||
|
||||
mod->symtab = (void *)symsec->sh_addr;
|
||||
mod->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
|
||||
/* Set up to point into init section. */
|
||||
mod->kallsyms = mod->init_layout.base + info->mod_kallsyms_init_off;
|
||||
|
||||
mod->kallsyms->symtab = (void *)symsec->sh_addr;
|
||||
mod->kallsyms->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
|
||||
/* Make sure we get permanent strtab: don't use info->strtab. */
|
||||
mod->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
|
||||
mod->kallsyms->strtab = (void *)info->sechdrs[info->index.str].sh_addr;
|
||||
|
||||
/* Set types up while we still have access to sections. */
|
||||
for (i = 0; i < mod->num_symtab; i++)
|
||||
mod->symtab[i].st_info = elf_type(&mod->symtab[i], info);
|
||||
for (i = 0; i < mod->kallsyms->num_symtab; i++)
|
||||
mod->kallsyms->symtab[i].st_info
|
||||
= elf_type(&mod->kallsyms->symtab[i], info);
|
||||
|
||||
mod->core_symtab = dst = mod->core_layout.base + info->symoffs;
|
||||
mod->core_strtab = s = mod->core_layout.base + info->stroffs;
|
||||
src = mod->symtab;
|
||||
for (ndst = i = 0; i < mod->num_symtab; i++) {
|
||||
/* Now populate the cut down core kallsyms for after init. */
|
||||
mod->core_kallsyms.symtab = dst = mod->core_layout.base + info->symoffs;
|
||||
mod->core_kallsyms.strtab = s = mod->core_layout.base + info->stroffs;
|
||||
src = mod->kallsyms->symtab;
|
||||
for (ndst = i = 0; i < mod->kallsyms->num_symtab; i++) {
|
||||
if (i == 0 ||
|
||||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum,
|
||||
info->index.pcpu)) {
|
||||
dst[ndst] = src[i];
|
||||
dst[ndst++].st_name = s - mod->core_strtab;
|
||||
s += strlcpy(s, &mod->strtab[src[i].st_name],
|
||||
dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
|
||||
s += strlcpy(s, &mod->kallsyms->strtab[src[i].st_name],
|
||||
KSYM_NAME_LEN) + 1;
|
||||
}
|
||||
}
|
||||
mod->core_num_syms = ndst;
|
||||
mod->core_kallsyms.num_symtab = ndst;
|
||||
}
|
||||
#else
|
||||
static inline void layout_symtab(struct module *mod, struct load_info *info)
|
||||
@@ -3263,9 +3284,8 @@ static noinline int do_init_module(struct module *mod)
|
||||
module_put(mod);
|
||||
trim_init_extable(mod);
|
||||
#ifdef CONFIG_KALLSYMS
|
||||
mod->num_symtab = mod->core_num_syms;
|
||||
mod->symtab = mod->core_symtab;
|
||||
mod->strtab = mod->core_strtab;
|
||||
/* Switch to core kallsyms now init is done: kallsyms may be walking! */
|
||||
rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
|
||||
#endif
|
||||
mod_tree_remove_init(mod);
|
||||
disable_ro_nx(&mod->init_layout);
|
||||
@@ -3295,6 +3315,7 @@ fail:
|
||||
module_put(mod);
|
||||
blocking_notifier_call_chain(&module_notify_list,
|
||||
MODULE_STATE_GOING, mod);
|
||||
ftrace_release_mod(mod);
|
||||
free_module(mod);
|
||||
wake_up_all(&module_wq);
|
||||
return ret;
|
||||
@@ -3371,6 +3392,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
|
||||
mod->state = MODULE_STATE_COMING;
|
||||
mutex_unlock(&module_mutex);
|
||||
|
||||
ftrace_module_enable(mod);
|
||||
blocking_notifier_call_chain(&module_notify_list,
|
||||
MODULE_STATE_COMING, mod);
|
||||
return 0;
|
||||
@@ -3496,7 +3518,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
|
||||
|
||||
/* Module is ready to execute: parsing args may do that. */
|
||||
after_dashes = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
|
||||
-32768, 32767, NULL,
|
||||
-32768, 32767, mod,
|
||||
unknown_module_param_cb);
|
||||
if (IS_ERR(after_dashes)) {
|
||||
err = PTR_ERR(after_dashes);
|
||||
@@ -3627,6 +3649,11 @@ static inline int is_arm_mapping_symbol(const char *str)
|
||||
&& (str[2] == '\0' || str[2] == '.');
|
||||
}
|
||||
|
||||
static const char *symname(struct mod_kallsyms *kallsyms, unsigned int symnum)
|
||||
{
|
||||
return kallsyms->strtab + kallsyms->symtab[symnum].st_name;
|
||||
}
|
||||
|
||||
static const char *get_ksymbol(struct module *mod,
|
||||
unsigned long addr,
|
||||
unsigned long *size,
|
||||
@@ -3634,6 +3661,7 @@ static const char *get_ksymbol(struct module *mod,
|
||||
{
|
||||
unsigned int i, best = 0;
|
||||
unsigned long nextval;
|
||||
struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
|
||||
|
||||
/* At worse, next value is at end of module */
|
||||
if (within_module_init(addr, mod))
|
||||
@@ -3643,32 +3671,32 @@ static const char *get_ksymbol(struct module *mod,
|
||||
|
||||
/* Scan for closest preceding symbol, and next symbol. (ELF
|
||||
starts real symbols at 1). */
|
||||
for (i = 1; i < mod->num_symtab; i++) {
|
||||
if (mod->symtab[i].st_shndx == SHN_UNDEF)
|
||||
for (i = 1; i < kallsyms->num_symtab; i++) {
|
||||
if (kallsyms->symtab[i].st_shndx == SHN_UNDEF)
|
||||
continue;
|
||||
|
||||
/* We ignore unnamed symbols: they're uninformative
|
||||
* and inserted at a whim. */
|
||||
if (mod->symtab[i].st_value <= addr
|
||||
&& mod->symtab[i].st_value > mod->symtab[best].st_value
|
||||
&& *(mod->strtab + mod->symtab[i].st_name) != '\0'
|
||||
&& !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
|
||||
if (*symname(kallsyms, i) == '\0'
|
||||
|| is_arm_mapping_symbol(symname(kallsyms, i)))
|
||||
continue;
|
||||
|
||||
if (kallsyms->symtab[i].st_value <= addr
|
||||
&& kallsyms->symtab[i].st_value > kallsyms->symtab[best].st_value)
|
||||
best = i;
|
||||
if (mod->symtab[i].st_value > addr
|
||||
&& mod->symtab[i].st_value < nextval
|
||||
&& *(mod->strtab + mod->symtab[i].st_name) != '\0'
|
||||
&& !is_arm_mapping_symbol(mod->strtab + mod->symtab[i].st_name))
|
||||
nextval = mod->symtab[i].st_value;
|
||||
if (kallsyms->symtab[i].st_value > addr
|
||||
&& kallsyms->symtab[i].st_value < nextval)
|
||||
nextval = kallsyms->symtab[i].st_value;
|
||||
}
|
||||
|
||||
if (!best)
|
||||
return NULL;
|
||||
|
||||
if (size)
|
||||
*size = nextval - mod->symtab[best].st_value;
|
||||
*size = nextval - kallsyms->symtab[best].st_value;
|
||||
if (offset)
|
||||
*offset = addr - mod->symtab[best].st_value;
|
||||
return mod->strtab + mod->symtab[best].st_name;
|
||||
*offset = addr - kallsyms->symtab[best].st_value;
|
||||
return symname(kallsyms, best);
|
||||
}
|
||||
|
||||
/* For kallsyms to ask for address resolution. NULL means not found. Careful
|
||||
@@ -3758,19 +3786,21 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
|
||||
preempt_disable();
|
||||
list_for_each_entry_rcu(mod, &modules, list) {
|
||||
struct mod_kallsyms *kallsyms;
|
||||
|
||||
if (mod->state == MODULE_STATE_UNFORMED)
|
||||
continue;
|
||||
if (symnum < mod->num_symtab) {
|
||||
*value = mod->symtab[symnum].st_value;
|
||||
*type = mod->symtab[symnum].st_info;
|
||||
strlcpy(name, mod->strtab + mod->symtab[symnum].st_name,
|
||||
KSYM_NAME_LEN);
|
||||
kallsyms = rcu_dereference_sched(mod->kallsyms);
|
||||
if (symnum < kallsyms->num_symtab) {
|
||||
*value = kallsyms->symtab[symnum].st_value;
|
||||
*type = kallsyms->symtab[symnum].st_info;
|
||||
strlcpy(name, symname(kallsyms, symnum), KSYM_NAME_LEN);
|
||||
strlcpy(module_name, mod->name, MODULE_NAME_LEN);
|
||||
*exported = is_exported(name, *value, mod);
|
||||
preempt_enable();
|
||||
return 0;
|
||||
}
|
||||
symnum -= mod->num_symtab;
|
||||
symnum -= kallsyms->num_symtab;
|
||||
}
|
||||
preempt_enable();
|
||||
return -ERANGE;
|
||||
@@ -3779,11 +3809,12 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
|
||||
static unsigned long mod_find_symname(struct module *mod, const char *name)
|
||||
{
|
||||
unsigned int i;
|
||||
struct mod_kallsyms *kallsyms = rcu_dereference_sched(mod->kallsyms);
|
||||
|
||||
for (i = 0; i < mod->num_symtab; i++)
|
||||
if (strcmp(name, mod->strtab+mod->symtab[i].st_name) == 0 &&
|
||||
mod->symtab[i].st_info != 'U')
|
||||
return mod->symtab[i].st_value;
|
||||
for (i = 0; i < kallsyms->num_symtab; i++)
|
||||
if (strcmp(name, symname(kallsyms, i)) == 0 &&
|
||||
kallsyms->symtab[i].st_info != 'U')
|
||||
return kallsyms->symtab[i].st_value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3822,11 +3853,14 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
module_assert_mutex();
|
||||
|
||||
list_for_each_entry(mod, &modules, list) {
|
||||
/* We hold module_mutex: no need for rcu_dereference_sched */
|
||||
struct mod_kallsyms *kallsyms = mod->kallsyms;
|
||||
|
||||
if (mod->state == MODULE_STATE_UNFORMED)
|
||||
continue;
|
||||
for (i = 0; i < mod->num_symtab; i++) {
|
||||
ret = fn(data, mod->strtab + mod->symtab[i].st_name,
|
||||
mod, mod->symtab[i].st_value);
|
||||
for (i = 0; i < kallsyms->num_symtab; i++) {
|
||||
ret = fn(data, symname(kallsyms, i),
|
||||
mod, kallsyms->symtab[i].st_value);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
@@ -1083,9 +1083,10 @@ struct resource * __request_region(struct resource *parent,
|
||||
if (!conflict)
|
||||
break;
|
||||
if (conflict != parent) {
|
||||
parent = conflict;
|
||||
if (!(conflict->flags & IORESOURCE_BUSY))
|
||||
if (!(conflict->flags & IORESOURCE_BUSY)) {
|
||||
parent = conflict;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (conflict->flags & flags & IORESOURCE_MUXED) {
|
||||
add_wait_queue(&muxed_resource_wait, &wait);
|
||||
|
@@ -420,7 +420,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se,
|
||||
* entity.
|
||||
*/
|
||||
if (dl_time_before(dl_se->deadline, rq_clock(rq))) {
|
||||
printk_deferred_once("sched: DL replenish lagged to much\n");
|
||||
printk_deferred_once("sched: DL replenish lagged too much\n");
|
||||
dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
|
||||
dl_se->runtime = pi_se->dl_runtime;
|
||||
}
|
||||
|
@@ -4961,7 +4961,7 @@ void ftrace_release_mod(struct module *mod)
|
||||
mutex_unlock(&ftrace_lock);
|
||||
}
|
||||
|
||||
static void ftrace_module_enable(struct module *mod)
|
||||
void ftrace_module_enable(struct module *mod)
|
||||
{
|
||||
struct dyn_ftrace *rec;
|
||||
struct ftrace_page *pg;
|
||||
@@ -5038,38 +5038,8 @@ void ftrace_module_init(struct module *mod)
|
||||
ftrace_process_locs(mod, mod->ftrace_callsites,
|
||||
mod->ftrace_callsites + mod->num_ftrace_callsites);
|
||||
}
|
||||
|
||||
static int ftrace_module_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
struct module *mod = data;
|
||||
|
||||
switch (val) {
|
||||
case MODULE_STATE_COMING:
|
||||
ftrace_module_enable(mod);
|
||||
break;
|
||||
case MODULE_STATE_GOING:
|
||||
ftrace_release_mod(mod);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int ftrace_module_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_MODULES */
|
||||
|
||||
struct notifier_block ftrace_module_nb = {
|
||||
.notifier_call = ftrace_module_notify,
|
||||
.priority = INT_MIN, /* Run after anything that can remove kprobes */
|
||||
};
|
||||
|
||||
void __init ftrace_init(void)
|
||||
{
|
||||
extern unsigned long __start_mcount_loc[];
|
||||
@@ -5098,10 +5068,6 @@ void __init ftrace_init(void)
|
||||
__start_mcount_loc,
|
||||
__stop_mcount_loc);
|
||||
|
||||
ret = register_module_notifier(&ftrace_module_nb);
|
||||
if (ret)
|
||||
pr_warning("Failed to register trace ftrace module exit notifier\n");
|
||||
|
||||
set_ftrace_early_filters();
|
||||
|
||||
return;
|
||||
|
@@ -97,16 +97,16 @@ trace_find_event_field(struct trace_event_call *call, char *name)
|
||||
struct ftrace_event_field *field;
|
||||
struct list_head *head;
|
||||
|
||||
head = trace_get_fields(call);
|
||||
field = __find_event_field(head, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
field = __find_event_field(&ftrace_generic_fields, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
field = __find_event_field(&ftrace_common_fields, name);
|
||||
if (field)
|
||||
return field;
|
||||
|
||||
head = trace_get_fields(call);
|
||||
return __find_event_field(head, name);
|
||||
return __find_event_field(&ftrace_common_fields, name);
|
||||
}
|
||||
|
||||
static int __trace_define_field(struct list_head *head, const char *type,
|
||||
@@ -171,8 +171,10 @@ static int trace_define_generic_fields(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
__generic_field(int, cpu, FILTER_OTHER);
|
||||
__generic_field(char *, comm, FILTER_PTR_STRING);
|
||||
__generic_field(int, CPU, FILTER_CPU);
|
||||
__generic_field(int, cpu, FILTER_CPU);
|
||||
__generic_field(char *, COMM, FILTER_COMM);
|
||||
__generic_field(char *, comm, FILTER_COMM);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -869,7 +871,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
* The ftrace subsystem is for showing formats only.
|
||||
* They can not be enabled or disabled via the event files.
|
||||
*/
|
||||
if (call->class && call->class->reg)
|
||||
if (call->class && call->class->reg &&
|
||||
!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
|
||||
return file;
|
||||
}
|
||||
|
||||
|
@@ -1043,13 +1043,14 @@ static int init_pred(struct filter_parse_state *ps,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (is_string_field(field)) {
|
||||
if (field->filter_type == FILTER_COMM) {
|
||||
filter_build_regex(pred);
|
||||
fn = filter_pred_comm;
|
||||
pred->regex.field_len = TASK_COMM_LEN;
|
||||
} else if (is_string_field(field)) {
|
||||
filter_build_regex(pred);
|
||||
|
||||
if (!strcmp(field->name, "comm")) {
|
||||
fn = filter_pred_comm;
|
||||
pred->regex.field_len = TASK_COMM_LEN;
|
||||
} else if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
if (field->filter_type == FILTER_STATIC_STRING) {
|
||||
fn = filter_pred_string;
|
||||
pred->regex.field_len = field->size;
|
||||
} else if (field->filter_type == FILTER_DYN_STRING)
|
||||
@@ -1072,7 +1073,7 @@ static int init_pred(struct filter_parse_state *ps,
|
||||
}
|
||||
pred->val = val;
|
||||
|
||||
if (!strcmp(field->name, "cpu"))
|
||||
if (field->filter_type == FILTER_CPU)
|
||||
fn = filter_pred_cpu;
|
||||
else
|
||||
fn = select_comparison_fn(pred->op, field->size,
|
||||
|
@@ -156,7 +156,11 @@ check_stack(unsigned long ip, unsigned long *stack)
|
||||
for (; p < top && i < stack_trace_max.nr_entries; p++) {
|
||||
if (stack_dump_trace[i] == ULONG_MAX)
|
||||
break;
|
||||
if (*p == stack_dump_trace[i]) {
|
||||
/*
|
||||
* The READ_ONCE_NOCHECK is used to let KASAN know that
|
||||
* this is not a stack-out-of-bounds error.
|
||||
*/
|
||||
if ((READ_ONCE_NOCHECK(*p)) == stack_dump_trace[i]) {
|
||||
stack_dump_trace[x] = stack_dump_trace[i++];
|
||||
this_size = stack_trace_index[x++] =
|
||||
(top - p) * sizeof(unsigned long);
|
||||
|
@@ -301,7 +301,23 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
|
||||
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
|
||||
static bool workqueue_freezing; /* PL: have wqs started freezing? */
|
||||
|
||||
static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
|
||||
/* PL: allowable cpus for unbound wqs and work items */
|
||||
static cpumask_var_t wq_unbound_cpumask;
|
||||
|
||||
/* CPU where unbound work was last round robin scheduled from this CPU */
|
||||
static DEFINE_PER_CPU(int, wq_rr_cpu_last);
|
||||
|
||||
/*
|
||||
* Local execution of unbound work items is no longer guaranteed. The
|
||||
* following always forces round-robin CPU selection on unbound work items
|
||||
* to uncover usages which depend on it.
|
||||
*/
|
||||
#ifdef CONFIG_DEBUG_WQ_FORCE_RR_CPU
|
||||
static bool wq_debug_force_rr_cpu = true;
|
||||
#else
|
||||
static bool wq_debug_force_rr_cpu = false;
|
||||
#endif
|
||||
module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644);
|
||||
|
||||
/* the per-cpu worker pools */
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
|
||||
@@ -570,6 +586,16 @@ static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
|
||||
int node)
|
||||
{
|
||||
assert_rcu_or_wq_mutex_or_pool_mutex(wq);
|
||||
|
||||
/*
|
||||
* XXX: @node can be NUMA_NO_NODE if CPU goes offline while a
|
||||
* delayed item is pending. The plan is to keep CPU -> NODE
|
||||
* mapping valid and stable across CPU on/offlines. Once that
|
||||
* happens, this workaround can be removed.
|
||||
*/
|
||||
if (unlikely(node == NUMA_NO_NODE))
|
||||
return wq->dfl_pwq;
|
||||
|
||||
return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
|
||||
}
|
||||
|
||||
@@ -1298,6 +1324,39 @@ static bool is_chained_work(struct workqueue_struct *wq)
|
||||
return worker && worker->current_pwq->wq == wq;
|
||||
}
|
||||
|
||||
/*
|
||||
* When queueing an unbound work item to a wq, prefer local CPU if allowed
|
||||
* by wq_unbound_cpumask. Otherwise, round robin among the allowed ones to
|
||||
* avoid perturbing sensitive tasks.
|
||||
*/
|
||||
static int wq_select_unbound_cpu(int cpu)
|
||||
{
|
||||
static bool printed_dbg_warning;
|
||||
int new_cpu;
|
||||
|
||||
if (likely(!wq_debug_force_rr_cpu)) {
|
||||
if (cpumask_test_cpu(cpu, wq_unbound_cpumask))
|
||||
return cpu;
|
||||
} else if (!printed_dbg_warning) {
|
||||
pr_warn("workqueue: round-robin CPU selection forced, expect performance impact\n");
|
||||
printed_dbg_warning = true;
|
||||
}
|
||||
|
||||
if (cpumask_empty(wq_unbound_cpumask))
|
||||
return cpu;
|
||||
|
||||
new_cpu = __this_cpu_read(wq_rr_cpu_last);
|
||||
new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
|
||||
if (unlikely(new_cpu >= nr_cpu_ids)) {
|
||||
new_cpu = cpumask_first_and(wq_unbound_cpumask, cpu_online_mask);
|
||||
if (unlikely(new_cpu >= nr_cpu_ids))
|
||||
return cpu;
|
||||
}
|
||||
__this_cpu_write(wq_rr_cpu_last, new_cpu);
|
||||
|
||||
return new_cpu;
|
||||
}
|
||||
|
||||
static void __queue_work(int cpu, struct workqueue_struct *wq,
|
||||
struct work_struct *work)
|
||||
{
|
||||
@@ -1323,7 +1382,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
||||
return;
|
||||
retry:
|
||||
if (req_cpu == WORK_CPU_UNBOUND)
|
||||
cpu = raw_smp_processor_id();
|
||||
cpu = wq_select_unbound_cpu(raw_smp_processor_id());
|
||||
|
||||
/* pwq which will be used unless @work is executing elsewhere */
|
||||
if (!(wq->flags & WQ_UNBOUND))
|
||||
@@ -1464,13 +1523,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
|
||||
timer_stats_timer_set_start_info(&dwork->timer);
|
||||
|
||||
dwork->wq = wq;
|
||||
/* timer isn't guaranteed to run in this cpu, record earlier */
|
||||
if (cpu == WORK_CPU_UNBOUND)
|
||||
cpu = raw_smp_processor_id();
|
||||
dwork->cpu = cpu;
|
||||
timer->expires = jiffies + delay;
|
||||
|
||||
add_timer_on(timer, cpu);
|
||||
if (unlikely(cpu != WORK_CPU_UNBOUND))
|
||||
add_timer_on(timer, cpu);
|
||||
else
|
||||
add_timer(timer);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2355,7 +2414,8 @@ static void check_flush_dependency(struct workqueue_struct *target_wq,
|
||||
WARN_ONCE(current->flags & PF_MEMALLOC,
|
||||
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||
current->pid, current->comm, target_wq->name, target_func);
|
||||
WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
|
||||
WARN_ONCE(worker && ((worker->current_pwq->wq->flags &
|
||||
(WQ_MEM_RECLAIM | __WQ_LEGACY)) == WQ_MEM_RECLAIM),
|
||||
"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||
worker->current_pwq->wq->name, worker->current_func,
|
||||
target_wq->name, target_func);
|
||||
|
Reference in New Issue
Block a user