Merge branch 'x86/cache' into perf/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2018-10-23 12:30:19 +02:00
316 changed files with 2969 additions and 1834 deletions

View File

@@ -129,7 +129,7 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
struct bpf_cgroup_storage *storage;
struct bpf_storage_buffer *new;
if (flags & BPF_NOEXIST)
if (flags != BPF_ANY && flags != BPF_EXIST)
return -EINVAL;
storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
@@ -195,6 +195,9 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
if (attr->value_size == 0)
return ERR_PTR(-EINVAL);
if (attr->value_size > PAGE_SIZE)
return ERR_PTR(-E2BIG);

View File

@@ -2896,6 +2896,15 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
u64 umin_val, umax_val;
u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
if (insn_bitness == 32) {
/* Relevant for 32-bit RSH: Information can propagate towards
* LSB, so it isn't sufficient to only truncate the output to
* 32 bits.
*/
coerce_reg_to_size(dst_reg, 4);
coerce_reg_to_size(&src_reg, 4);
}
smin_val = src_reg.smin_value;
smax_val = src_reg.smax_value;
umin_val = src_reg.umin_value;
@@ -3131,7 +3140,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
if (BPF_CLASS(insn->code) != BPF_ALU64) {
/* 32-bit ALU ops are (32,32)->32 */
coerce_reg_to_size(dst_reg, 4);
coerce_reg_to_size(&src_reg, 4);
}
__reg_deduce_bounds(dst_reg);

View File

@@ -260,7 +260,7 @@ static void test_cycle_work(struct work_struct *work)
{
struct test_cycle *cycle = container_of(work, typeof(*cycle), work);
struct ww_acquire_ctx ctx;
int err;
int err, erra = 0;
ww_acquire_init(&ctx, &ww_class);
ww_mutex_lock(&cycle->a_mutex, &ctx);
@@ -270,17 +270,19 @@ static void test_cycle_work(struct work_struct *work)
err = ww_mutex_lock(cycle->b_mutex, &ctx);
if (err == -EDEADLK) {
err = 0;
ww_mutex_unlock(&cycle->a_mutex);
ww_mutex_lock_slow(cycle->b_mutex, &ctx);
err = ww_mutex_lock(&cycle->a_mutex, &ctx);
erra = ww_mutex_lock(&cycle->a_mutex, &ctx);
}
if (!err)
ww_mutex_unlock(cycle->b_mutex);
ww_mutex_unlock(&cycle->a_mutex);
if (!erra)
ww_mutex_unlock(&cycle->a_mutex);
ww_acquire_fini(&ctx);
cycle->result = err;
cycle->result = err ?: erra;
}
static int __test_cycle(unsigned int nthreads)

View File

@@ -1167,7 +1167,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
if (task_cpu(p) != new_cpu) {
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p);
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
rseq_migrate(p);
perf_event_task_migrate(p);

View File

@@ -1607,7 +1607,7 @@ out:
return cpu;
}
static void migrate_task_rq_dl(struct task_struct *p)
static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused)
{
struct rq *rq;

View File

@@ -1392,6 +1392,17 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
int last_cpupid, this_cpupid;
this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
/*
* Allow first faults or private faults to migrate immediately early in
* the lifetime of a task. The magic number 4 is based on waiting for
* two full passes of the "multi-stage node selection" test that is
* executed below.
*/
if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&
(cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
return true;
/*
* Multi-stage node selection is used in conjunction with a periodic
@@ -1410,7 +1421,6 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
* This quadric squishes small probabilities, making it less likely we
* act on an unlikely task<->page relation.
*/
last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
if (!cpupid_pid_unset(last_cpupid) &&
cpupid_to_nid(last_cpupid) != dst_nid)
return false;
@@ -1514,6 +1524,21 @@ struct task_numa_env {
static void task_numa_assign(struct task_numa_env *env,
struct task_struct *p, long imp)
{
struct rq *rq = cpu_rq(env->dst_cpu);
/* Bail out if run-queue part of active NUMA balance. */
if (xchg(&rq->numa_migrate_on, 1))
return;
/*
* Clear previous best_cpu/rq numa-migrate flag, since task now
* found a better CPU to move/swap.
*/
if (env->best_cpu != -1) {
rq = cpu_rq(env->best_cpu);
WRITE_ONCE(rq->numa_migrate_on, 0);
}
if (env->best_task)
put_task_struct(env->best_task);
if (p)
@@ -1552,6 +1577,13 @@ static bool load_too_imbalanced(long src_load, long dst_load,
return (imb > old_imb);
}
/*
* Maximum NUMA importance can be 1998 (2*999);
* SMALLIMP @ 30 would be close to 1998/64.
* Used to deter task migration.
*/
#define SMALLIMP 30
/*
* This checks if the overall compute and NUMA accesses of the system would
* be improved if the source tasks was migrated to the target dst_cpu taking
@@ -1569,6 +1601,9 @@ static void task_numa_compare(struct task_numa_env *env,
long moveimp = imp;
int dist = env->dist;
if (READ_ONCE(dst_rq->numa_migrate_on))
return;
rcu_read_lock();
cur = task_rcu_dereference(&dst_rq->curr);
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
@@ -1582,7 +1617,7 @@ static void task_numa_compare(struct task_numa_env *env,
goto unlock;
if (!cur) {
if (maymove || imp > env->best_imp)
if (maymove && moveimp >= env->best_imp)
goto assign;
else
goto unlock;
@@ -1625,15 +1660,21 @@ static void task_numa_compare(struct task_numa_env *env,
task_weight(cur, env->dst_nid, dist);
}
if (imp <= env->best_imp)
goto unlock;
if (maymove && moveimp > imp && moveimp > env->best_imp) {
imp = moveimp - 1;
imp = moveimp;
cur = NULL;
goto assign;
}
/*
* If the NUMA importance is less than SMALLIMP,
* task migration might only result in ping pong
* of tasks and also hurt performance due to cache
* misses.
*/
if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
goto unlock;
/*
* In the overloaded case, try and keep the load balanced.
*/
@@ -1710,6 +1751,7 @@ static int task_numa_migrate(struct task_struct *p)
.best_cpu = -1,
};
struct sched_domain *sd;
struct rq *best_rq;
unsigned long taskweight, groupweight;
int nid, ret, dist;
long taskimp, groupimp;
@@ -1805,20 +1847,17 @@ static int task_numa_migrate(struct task_struct *p)
if (env.best_cpu == -1)
return -EAGAIN;
/*
* Reset the scan period if the task is being rescheduled on an
* alternative node to recheck if the tasks is now properly placed.
*/
p->numa_scan_period = task_scan_start(p);
best_rq = cpu_rq(env.best_cpu);
if (env.best_task == NULL) {
ret = migrate_task_to(p, env.best_cpu);
WRITE_ONCE(best_rq->numa_migrate_on, 0);
if (ret != 0)
trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);
return ret;
}
ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
WRITE_ONCE(best_rq->numa_migrate_on, 0);
if (ret != 0)
trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
@@ -2596,6 +2635,39 @@ void task_tick_numa(struct rq *rq, struct task_struct *curr)
}
}
static void update_scan_period(struct task_struct *p, int new_cpu)
{
int src_nid = cpu_to_node(task_cpu(p));
int dst_nid = cpu_to_node(new_cpu);
if (!static_branch_likely(&sched_numa_balancing))
return;
if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
return;
if (src_nid == dst_nid)
return;
/*
* Allow resets if faults have been trapped before one scan
* has completed. This is most likely due to a new task that
* is pulled cross-node due to wakeups or load balancing.
*/
if (p->numa_scan_seq) {
/*
* Avoid scan adjustments if moving to the preferred
* node or if the task was not previously running on
* the preferred node.
*/
if (dst_nid == p->numa_preferred_nid ||
(p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))
return;
}
p->numa_scan_period = task_scan_start(p);
}
#else
static void task_tick_numa(struct rq *rq, struct task_struct *curr)
{
@@ -2609,6 +2681,10 @@ static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
{
}
static inline void update_scan_period(struct task_struct *p, int new_cpu)
{
}
#endif /* CONFIG_NUMA_BALANCING */
static void
@@ -6275,7 +6351,7 @@ static void detach_entity_cfs_rq(struct sched_entity *se);
* cfs_rq_of(p) references at time of call are still valid and identify the
* previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
*/
static void migrate_task_rq_fair(struct task_struct *p)
static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
{
/*
* As blocked tasks retain absolute vruntime the migration needs to
@@ -6328,6 +6404,8 @@ static void migrate_task_rq_fair(struct task_struct *p)
/* We have migrated, no longer consider this task hot */
p->se.exec_start = 0;
update_scan_period(p, new_cpu);
}
static void task_dead_fair(struct task_struct *p)

View File

@@ -783,6 +783,7 @@ struct rq {
#ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running;
unsigned int nr_preferred_running;
unsigned int numa_migrate_on;
#endif
#define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX];
@@ -1523,7 +1524,7 @@ struct sched_class {
#ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p);
void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
void (*task_woken)(struct rq *this_rq, struct task_struct *task);