Merge branch 'linus' into irq/core
Pick up upstream changes to avoid conflicts for pending patches.
This commit is contained in:
@@ -467,7 +467,7 @@ static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
|
||||
return kind_ops[BTF_INFO_KIND(t->info)];
|
||||
}
|
||||
|
||||
bool btf_name_offset_valid(const struct btf *btf, u32 offset)
|
||||
static bool btf_name_offset_valid(const struct btf *btf, u32 offset)
|
||||
{
|
||||
return BTF_STR_OFFSET_VALID(offset) &&
|
||||
offset < btf->hdr.str_len;
|
||||
@@ -1219,8 +1219,6 @@ static void btf_bitfield_seq_show(void *data, u8 bits_offset,
|
||||
u8 nr_copy_bits;
|
||||
u64 print_num;
|
||||
|
||||
data += BITS_ROUNDDOWN_BYTES(bits_offset);
|
||||
bits_offset = BITS_PER_BYTE_MASKED(bits_offset);
|
||||
nr_copy_bits = nr_bits + bits_offset;
|
||||
nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
|
||||
|
||||
@@ -1255,7 +1253,9 @@ static void btf_int_bits_seq_show(const struct btf *btf,
|
||||
* BTF_INT_OFFSET() cannot exceed 64 bits.
|
||||
*/
|
||||
total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data);
|
||||
btf_bitfield_seq_show(data, total_bits_offset, nr_bits, m);
|
||||
data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
|
||||
bits_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
|
||||
btf_bitfield_seq_show(data, bits_offset, nr_bits, m);
|
||||
}
|
||||
|
||||
static void btf_int_seq_show(const struct btf *btf, const struct btf_type *t,
|
||||
@@ -2001,12 +2001,12 @@ static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t,
|
||||
|
||||
member_offset = btf_member_bit_offset(t, member);
|
||||
bitfield_size = btf_member_bitfield_size(t, member);
|
||||
bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
|
||||
bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
|
||||
if (bitfield_size) {
|
||||
btf_bitfield_seq_show(data, member_offset,
|
||||
btf_bitfield_seq_show(data + bytes_offset, bits8_offset,
|
||||
bitfield_size, m);
|
||||
} else {
|
||||
bytes_offset = BITS_ROUNDDOWN_BYTES(member_offset);
|
||||
bits8_offset = BITS_PER_BYTE_MASKED(member_offset);
|
||||
ops = btf_type_ops(member_type);
|
||||
ops->seq_show(btf, member_type, member->type,
|
||||
data + bytes_offset, bits8_offset, m);
|
||||
|
@@ -718,6 +718,7 @@ cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
case BPF_FUNC_trace_printk:
|
||||
if (capable(CAP_SYS_ADMIN))
|
||||
return bpf_get_trace_printk_proto();
|
||||
/* fall through */
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -12,6 +12,7 @@
|
||||
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
{
|
||||
struct bpf_map *inner_map, *inner_map_meta;
|
||||
u32 inner_map_meta_size;
|
||||
struct fd f;
|
||||
|
||||
f = fdget(inner_map_ufd);
|
||||
@@ -36,7 +37,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER);
|
||||
inner_map_meta_size = sizeof(*inner_map_meta);
|
||||
/* In some cases verifier needs to access beyond just base map. */
|
||||
if (inner_map->ops == &array_map_ops)
|
||||
inner_map_meta_size = sizeof(struct bpf_array);
|
||||
|
||||
inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER);
|
||||
if (!inner_map_meta) {
|
||||
fdput(f);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
@@ -46,9 +52,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
|
||||
inner_map_meta->key_size = inner_map->key_size;
|
||||
inner_map_meta->value_size = inner_map->value_size;
|
||||
inner_map_meta->map_flags = inner_map->map_flags;
|
||||
inner_map_meta->ops = inner_map->ops;
|
||||
inner_map_meta->max_entries = inner_map->max_entries;
|
||||
|
||||
/* Misc members not needed in bpf_map_meta_equal() check. */
|
||||
inner_map_meta->ops = inner_map->ops;
|
||||
if (inner_map->ops == &array_map_ops) {
|
||||
inner_map_meta->unpriv_array = inner_map->unpriv_array;
|
||||
container_of(inner_map_meta, struct bpf_array, map)->index_mask =
|
||||
container_of(inner_map, struct bpf_array, map)->index_mask;
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
return inner_map_meta;
|
||||
}
|
||||
|
@@ -180,11 +180,14 @@ static inline int stack_map_parse_build_id(void *page_addr,
|
||||
|
||||
if (nhdr->n_type == BPF_BUILD_ID &&
|
||||
nhdr->n_namesz == sizeof("GNU") &&
|
||||
nhdr->n_descsz == BPF_BUILD_ID_SIZE) {
|
||||
nhdr->n_descsz > 0 &&
|
||||
nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
|
||||
memcpy(build_id,
|
||||
note_start + note_offs +
|
||||
ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
|
||||
BPF_BUILD_ID_SIZE);
|
||||
nhdr->n_descsz);
|
||||
memset(build_id + nhdr->n_descsz, 0,
|
||||
BPF_BUILD_ID_SIZE - nhdr->n_descsz);
|
||||
return 0;
|
||||
}
|
||||
new_offs = note_offs + sizeof(Elf32_Nhdr) +
|
||||
@@ -260,7 +263,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
|
||||
return -EFAULT; /* page not mapped */
|
||||
|
||||
ret = -EINVAL;
|
||||
page_addr = page_address(page);
|
||||
page_addr = kmap_atomic(page);
|
||||
ehdr = (Elf32_Ehdr *)page_addr;
|
||||
|
||||
/* compare magic x7f "ELF" */
|
||||
@@ -276,6 +279,7 @@ static int stack_map_get_build_id(struct vm_area_struct *vma,
|
||||
else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
|
||||
ret = stack_map_get_build_id_64(page_addr, build_id);
|
||||
out:
|
||||
kunmap_atomic(page_addr);
|
||||
put_page(page);
|
||||
return ret;
|
||||
}
|
||||
@@ -310,6 +314,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
for (i = 0; i < trace_nr; i++) {
|
||||
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
|
||||
id_offs[i].ip = ips[i];
|
||||
memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -320,6 +325,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
|
||||
/* per entry fall back to ips */
|
||||
id_offs[i].status = BPF_STACK_BUILD_ID_IP;
|
||||
id_offs[i].ip = ips[i];
|
||||
memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
|
||||
continue;
|
||||
}
|
||||
id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
|
||||
|
@@ -3103,6 +3103,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
|
||||
}
|
||||
}
|
||||
|
||||
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
|
||||
const struct bpf_insn *insn)
|
||||
{
|
||||
return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
|
||||
}
|
||||
|
||||
static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
|
||||
u32 alu_state, u32 alu_limit)
|
||||
{
|
||||
/* If we arrived here from different branches with different
|
||||
* state or limits to sanitize, then this won't work.
|
||||
*/
|
||||
if (aux->alu_state &&
|
||||
(aux->alu_state != alu_state ||
|
||||
aux->alu_limit != alu_limit))
|
||||
return -EACCES;
|
||||
|
||||
/* Corresponding fixup done in fixup_bpf_calls(). */
|
||||
aux->alu_state = alu_state;
|
||||
aux->alu_limit = alu_limit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sanitize_val_alu(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn)
|
||||
{
|
||||
struct bpf_insn_aux_data *aux = cur_aux(env);
|
||||
|
||||
if (can_skip_alu_sanitation(env, insn))
|
||||
return 0;
|
||||
|
||||
return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
|
||||
}
|
||||
|
||||
static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
||||
struct bpf_insn *insn,
|
||||
const struct bpf_reg_state *ptr_reg,
|
||||
@@ -3117,7 +3151,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state tmp;
|
||||
bool ret;
|
||||
|
||||
if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K)
|
||||
if (can_skip_alu_sanitation(env, insn))
|
||||
return 0;
|
||||
|
||||
/* We already marked aux for masking from non-speculative
|
||||
@@ -3133,19 +3167,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
|
||||
|
||||
if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
|
||||
return 0;
|
||||
|
||||
/* If we arrived here from different branches with different
|
||||
* limits to sanitize, then this won't work.
|
||||
*/
|
||||
if (aux->alu_state &&
|
||||
(aux->alu_state != alu_state ||
|
||||
aux->alu_limit != alu_limit))
|
||||
if (update_alu_sanitation_state(aux, alu_state, alu_limit))
|
||||
return -EACCES;
|
||||
|
||||
/* Corresponding fixup done in fixup_bpf_calls(). */
|
||||
aux->alu_state = alu_state;
|
||||
aux->alu_limit = alu_limit;
|
||||
|
||||
do_sim:
|
||||
/* Simulate and find potential out-of-bounds access under
|
||||
* speculative execution from truncation as a result of
|
||||
@@ -3418,6 +3441,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
s64 smin_val, smax_val;
|
||||
u64 umin_val, umax_val;
|
||||
u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
|
||||
u32 dst = insn->dst_reg;
|
||||
int ret;
|
||||
|
||||
if (insn_bitness == 32) {
|
||||
/* Relevant for 32-bit RSH: Information can propagate towards
|
||||
@@ -3452,6 +3477,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
|
||||
switch (opcode) {
|
||||
case BPF_ADD:
|
||||
ret = sanitize_val_alu(env, insn);
|
||||
if (ret < 0) {
|
||||
verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
|
||||
return ret;
|
||||
}
|
||||
if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
|
||||
signed_add_overflows(dst_reg->smax_value, smax_val)) {
|
||||
dst_reg->smin_value = S64_MIN;
|
||||
@@ -3471,6 +3501,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
|
||||
dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
|
||||
break;
|
||||
case BPF_SUB:
|
||||
ret = sanitize_val_alu(env, insn);
|
||||
if (ret < 0) {
|
||||
verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
|
||||
return ret;
|
||||
}
|
||||
if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
|
||||
signed_sub_overflows(dst_reg->smax_value, smin_val)) {
|
||||
/* Overflow possible, we know nothing */
|
||||
|
38
kernel/cpu.c
38
kernel/cpu.c
@@ -376,9 +376,6 @@ void __weak arch_smt_update(void) { }
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_SMT
|
||||
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
|
||||
EXPORT_SYMBOL_GPL(cpu_smt_control);
|
||||
|
||||
static bool cpu_smt_available __read_mostly;
|
||||
|
||||
void __init cpu_smt_disable(bool force)
|
||||
{
|
||||
@@ -397,25 +394,11 @@ void __init cpu_smt_disable(bool force)
|
||||
|
||||
/*
|
||||
* The decision whether SMT is supported can only be done after the full
|
||||
* CPU identification. Called from architecture code before non boot CPUs
|
||||
* are brought up.
|
||||
*/
|
||||
void __init cpu_smt_check_topology_early(void)
|
||||
{
|
||||
if (!topology_smt_supported())
|
||||
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/*
|
||||
* If SMT was disabled by BIOS, detect it here, after the CPUs have been
|
||||
* brought online. This ensures the smt/l1tf sysfs entries are consistent
|
||||
* with reality. cpu_smt_available is set to true during the bringup of non
|
||||
* boot CPUs when a SMT sibling is detected. Note, this may overwrite
|
||||
* cpu_smt_control's previous setting.
|
||||
* CPU identification. Called from architecture code.
|
||||
*/
|
||||
void __init cpu_smt_check_topology(void)
|
||||
{
|
||||
if (!cpu_smt_available)
|
||||
if (!topology_smt_supported())
|
||||
cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
@@ -428,18 +411,10 @@ early_param("nosmt", smt_cmdline_disable);
|
||||
|
||||
static inline bool cpu_smt_allowed(unsigned int cpu)
|
||||
{
|
||||
if (topology_is_primary_thread(cpu))
|
||||
if (cpu_smt_control == CPU_SMT_ENABLED)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the CPU is not a 'primary' thread and the booted_once bit is
|
||||
* set then the processor has SMT support. Store this information
|
||||
* for the late check of SMT support in cpu_smt_check_topology().
|
||||
*/
|
||||
if (per_cpu(cpuhp_state, cpu).booted_once)
|
||||
cpu_smt_available = true;
|
||||
|
||||
if (cpu_smt_control == CPU_SMT_ENABLED)
|
||||
if (topology_is_primary_thread(cpu))
|
||||
return true;
|
||||
|
||||
/*
|
||||
@@ -2090,10 +2065,8 @@ static int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
|
||||
*/
|
||||
cpuhp_offline_cpu_device(cpu);
|
||||
}
|
||||
if (!ret) {
|
||||
if (!ret)
|
||||
cpu_smt_control = ctrlval;
|
||||
arch_smt_update();
|
||||
}
|
||||
cpu_maps_update_done();
|
||||
return ret;
|
||||
}
|
||||
@@ -2104,7 +2077,6 @@ static int cpuhp_smt_enable(void)
|
||||
|
||||
cpu_maps_update_begin();
|
||||
cpu_smt_control = CPU_SMT_ENABLED;
|
||||
arch_smt_update();
|
||||
for_each_present_cpu(cpu) {
|
||||
/* Skip online CPUs and CPUs on offline nodes */
|
||||
if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
|
||||
|
@@ -378,6 +378,8 @@ void __init swiotlb_exit(void)
|
||||
memblock_free_late(io_tlb_start,
|
||||
PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
|
||||
}
|
||||
io_tlb_start = 0;
|
||||
io_tlb_end = 0;
|
||||
io_tlb_nslabs = 0;
|
||||
max_segment = 0;
|
||||
}
|
||||
|
@@ -436,18 +436,18 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
|
||||
void __user *buffer, size_t *lenp,
|
||||
loff_t *ppos)
|
||||
{
|
||||
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
int ret;
|
||||
int perf_cpu = sysctl_perf_cpu_time_max_percent;
|
||||
/*
|
||||
* If throttling is disabled don't allow the write:
|
||||
*/
|
||||
if (sysctl_perf_cpu_time_max_percent == 100 ||
|
||||
sysctl_perf_cpu_time_max_percent == 0)
|
||||
if (write && (perf_cpu == 100 || perf_cpu == 0))
|
||||
return -EINVAL;
|
||||
|
||||
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
||||
if (ret || !write)
|
||||
return ret;
|
||||
|
||||
max_samples_per_tick = DIV_ROUND_UP(sysctl_perf_event_sample_rate, HZ);
|
||||
perf_sample_period_ns = NSEC_PER_SEC / sysctl_perf_event_sample_rate;
|
||||
update_perf_cpu_limits();
|
||||
|
@@ -307,7 +307,7 @@ void rcuwait_wake_up(struct rcuwait *w)
|
||||
* MB (A) MB (B)
|
||||
* [L] cond [L] tsk
|
||||
*/
|
||||
smp_rmb(); /* (B) */
|
||||
smp_mb(); /* (B) */
|
||||
|
||||
/*
|
||||
* Avoid using task_rcu_dereference() magic as long as we are careful,
|
||||
@@ -558,12 +558,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct task_struct *find_child_reaper(struct task_struct *father)
|
||||
static struct task_struct *find_child_reaper(struct task_struct *father,
|
||||
struct list_head *dead)
|
||||
__releases(&tasklist_lock)
|
||||
__acquires(&tasklist_lock)
|
||||
{
|
||||
struct pid_namespace *pid_ns = task_active_pid_ns(father);
|
||||
struct task_struct *reaper = pid_ns->child_reaper;
|
||||
struct task_struct *p, *n;
|
||||
|
||||
if (likely(reaper != father))
|
||||
return reaper;
|
||||
@@ -579,6 +581,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
|
||||
panic("Attempted to kill init! exitcode=0x%08x\n",
|
||||
father->signal->group_exit_code ?: father->exit_code);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(p, n, dead, ptrace_entry) {
|
||||
list_del_init(&p->ptrace_entry);
|
||||
release_task(p);
|
||||
}
|
||||
|
||||
zap_pid_ns_processes(pid_ns);
|
||||
write_lock_irq(&tasklist_lock);
|
||||
|
||||
@@ -668,7 +676,7 @@ static void forget_original_parent(struct task_struct *father,
|
||||
exit_ptrace(father, dead);
|
||||
|
||||
/* Can drop and reacquire tasklist_lock */
|
||||
reaper = find_child_reaper(father);
|
||||
reaper = find_child_reaper(father, dead);
|
||||
if (list_empty(&father->children))
|
||||
return;
|
||||
|
||||
@@ -866,6 +874,7 @@ void __noreturn do_exit(long code)
|
||||
exit_task_namespaces(tsk);
|
||||
exit_task_work(tsk);
|
||||
exit_thread(tsk);
|
||||
exit_umh(tsk);
|
||||
|
||||
/*
|
||||
* Flush inherited counters to the parent - before the parent
|
||||
|
@@ -1452,11 +1452,7 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Queue the task for later wakeup for after we've released
|
||||
* the hb->lock. wake_q_add() grabs reference to p.
|
||||
*/
|
||||
wake_q_add(wake_q, p);
|
||||
get_task_struct(p);
|
||||
__unqueue_futex(q);
|
||||
/*
|
||||
* The waiting task can free the futex_q as soon as q->lock_ptr = NULL
|
||||
@@ -1466,6 +1462,13 @@ static void mark_wake_futex(struct wake_q_head *wake_q, struct futex_q *q)
|
||||
* plist_del in __unqueue_futex().
|
||||
*/
|
||||
smp_store_release(&q->lock_ptr, NULL);
|
||||
|
||||
/*
|
||||
* Queue the task for later wakeup for after we've released
|
||||
* the hb->lock. wake_q_add() grabs reference to p.
|
||||
*/
|
||||
wake_q_add(wake_q, p);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -458,7 +458,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node,
|
||||
|
||||
/* Validate affinity mask(s) */
|
||||
if (affinity) {
|
||||
for (i = 0; i < cnt; i++, i++) {
|
||||
for (i = 0; i < cnt; i++) {
|
||||
if (cpumask_empty(&affinity[i].mask))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -393,6 +393,9 @@ int irq_setup_affinity(struct irq_desc *desc)
|
||||
}
|
||||
|
||||
cpumask_and(&mask, cpu_online_mask, set);
|
||||
if (cpumask_empty(&mask))
|
||||
cpumask_copy(&mask, cpu_online_mask);
|
||||
|
||||
if (node != NUMA_NO_NODE) {
|
||||
const struct cpumask *nodemask = cpumask_of_node(node);
|
||||
|
||||
|
@@ -198,15 +198,22 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
|
||||
woken++;
|
||||
tsk = waiter->task;
|
||||
|
||||
wake_q_add(wake_q, tsk);
|
||||
get_task_struct(tsk);
|
||||
list_del(&waiter->list);
|
||||
/*
|
||||
* Ensure that the last operation is setting the reader
|
||||
* Ensure calling get_task_struct() before setting the reader
|
||||
* waiter to nil such that rwsem_down_read_failed() cannot
|
||||
* race with do_exit() by always holding a reference count
|
||||
* to the task to wakeup.
|
||||
*/
|
||||
smp_store_release(&waiter->task, NULL);
|
||||
/*
|
||||
* Ensure issuing the wakeup (either by us or someone else)
|
||||
* after setting the reader waiter to nil.
|
||||
*/
|
||||
wake_q_add(wake_q, tsk);
|
||||
/* wake_q_add() already take the task ref */
|
||||
put_task_struct(tsk);
|
||||
}
|
||||
|
||||
adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
|
||||
|
@@ -396,6 +396,18 @@ static bool set_nr_if_polling(struct task_struct *p)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* wake_q_add() - queue a wakeup for 'later' waking.
|
||||
* @head: the wake_q_head to add @task to
|
||||
* @task: the task to queue for 'later' wakeup
|
||||
*
|
||||
* Queue a task for later wakeup, most likely by the wake_up_q() call in the
|
||||
* same context, _HOWEVER_ this is not guaranteed, the wakeup can come
|
||||
* instantly.
|
||||
*
|
||||
* This function must be used as-if it were wake_up_process(); IOW the task
|
||||
* must be ready to be woken at this location.
|
||||
*/
|
||||
void wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
{
|
||||
struct wake_q_node *node = &task->wake_q;
|
||||
@@ -405,10 +417,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
|
||||
* its already queued (either by us or someone else) and will get the
|
||||
* wakeup due to that.
|
||||
*
|
||||
* This cmpxchg() executes a full barrier, which pairs with the full
|
||||
* barrier executed by the wakeup in wake_up_q().
|
||||
* In order to ensure that a pending wakeup will observe our pending
|
||||
* state, even in the failed case, an explicit smp_mb() must be used.
|
||||
*/
|
||||
if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))
|
||||
smp_mb__before_atomic();
|
||||
if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
|
||||
return;
|
||||
|
||||
get_task_struct(task);
|
||||
|
@@ -5980,6 +5980,7 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
|
||||
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
DEFINE_STATIC_KEY_FALSE(sched_smt_present);
|
||||
EXPORT_SYMBOL_GPL(sched_smt_present);
|
||||
|
||||
static inline void set_idle_cores(int cpu, int val)
|
||||
{
|
||||
|
@@ -124,6 +124,7 @@
|
||||
* sampling of the aggregate task states would be.
|
||||
*/
|
||||
|
||||
#include "../workqueue_internal.h"
|
||||
#include <linux/sched/loadavg.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/proc_fs.h>
|
||||
@@ -480,9 +481,6 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||
groupc->tasks[t]++;
|
||||
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (!delayed_work_pending(&group->clock_work))
|
||||
schedule_delayed_work(&group->clock_work, PSI_FREQ);
|
||||
}
|
||||
|
||||
static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
|
||||
@@ -513,6 +511,7 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||
{
|
||||
int cpu = task_cpu(task);
|
||||
struct psi_group *group;
|
||||
bool wake_clock = true;
|
||||
void *iter = NULL;
|
||||
|
||||
if (!task->pid)
|
||||
@@ -530,8 +529,22 @@ void psi_task_change(struct task_struct *task, int clear, int set)
|
||||
task->psi_flags &= ~clear;
|
||||
task->psi_flags |= set;
|
||||
|
||||
while ((group = iterate_groups(task, &iter)))
|
||||
/*
|
||||
* Periodic aggregation shuts off if there is a period of no
|
||||
* task changes, so we wake it back up if necessary. However,
|
||||
* don't do this if the task change is the aggregation worker
|
||||
* itself going to sleep, or we'll ping-pong forever.
|
||||
*/
|
||||
if (unlikely((clear & TSK_RUNNING) &&
|
||||
(task->flags & PF_WQ_WORKER) &&
|
||||
wq_worker_last_func(task) == psi_update_work))
|
||||
wake_clock = false;
|
||||
|
||||
while ((group = iterate_groups(task, &iter))) {
|
||||
psi_group_change(group, cpu, clear, set);
|
||||
if (wake_clock && !delayed_work_pending(&group->clock_work))
|
||||
schedule_delayed_work(&group->clock_work, PSI_FREQ);
|
||||
}
|
||||
}
|
||||
|
||||
void psi_memstall_tick(struct task_struct *task, int cpu)
|
||||
|
@@ -976,6 +976,9 @@ static int seccomp_notify_release(struct inode *inode, struct file *file)
|
||||
struct seccomp_filter *filter = file->private_data;
|
||||
struct seccomp_knotif *knotif;
|
||||
|
||||
if (!filter)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&filter->notify_lock);
|
||||
|
||||
/*
|
||||
@@ -1300,6 +1303,7 @@ out:
|
||||
out_put_fd:
|
||||
if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) {
|
||||
if (ret < 0) {
|
||||
listener_f->private_data = NULL;
|
||||
fput(listener_f);
|
||||
put_unused_fd(listener);
|
||||
} else {
|
||||
|
@@ -584,8 +584,6 @@ void __init smp_init(void)
|
||||
num_nodes, (num_nodes > 1 ? "s" : ""),
|
||||
num_cpus, (num_cpus > 1 ? "s" : ""));
|
||||
|
||||
/* Final decision about SMT support */
|
||||
cpu_smt_check_topology();
|
||||
/* Any cleanup work */
|
||||
smp_cpus_done(setup_max_cpus);
|
||||
}
|
||||
|
@@ -685,6 +685,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
|
||||
* set up the signal and overrun bookkeeping.
|
||||
*/
|
||||
timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);
|
||||
timer->it_interval = ns_to_ktime(timer->it.cpu.incr);
|
||||
|
||||
/*
|
||||
* This acts as a modification timestamp for the timer,
|
||||
|
@@ -607,11 +607,17 @@ static int trace_kprobe_create(int argc, const char *argv[])
|
||||
char buf[MAX_EVENT_NAME_LEN];
|
||||
unsigned int flags = TPARG_FL_KERNEL;
|
||||
|
||||
/* argc must be >= 1 */
|
||||
if (argv[0][0] == 'r') {
|
||||
switch (argv[0][0]) {
|
||||
case 'r':
|
||||
is_return = true;
|
||||
flags |= TPARG_FL_RETURN;
|
||||
} else if (argv[0][0] != 'p' || argc < 2)
|
||||
break;
|
||||
case 'p':
|
||||
break;
|
||||
default:
|
||||
return -ECANCELED;
|
||||
}
|
||||
if (argc < 2)
|
||||
return -ECANCELED;
|
||||
|
||||
event = strchr(&argv[0][1], ':');
|
||||
|
@@ -5,7 +5,7 @@
|
||||
* Copyright (C) IBM Corporation, 2010-2012
|
||||
* Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
|
||||
*/
|
||||
#define pr_fmt(fmt) "trace_kprobe: " fmt
|
||||
#define pr_fmt(fmt) "trace_uprobe: " fmt
|
||||
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/module.h>
|
||||
@@ -160,6 +160,13 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
|
||||
if (ret >= 0) {
|
||||
if (ret == maxlen)
|
||||
dst[ret - 1] = '\0';
|
||||
else
|
||||
/*
|
||||
* Include the terminating null byte. In this case it
|
||||
* was copied by strncpy_from_user but not accounted
|
||||
* for in ret.
|
||||
*/
|
||||
ret++;
|
||||
*(u32 *)dest = make_data_loc(ret, (void *)dst - base);
|
||||
}
|
||||
|
||||
|
33
kernel/umh.c
33
kernel/umh.c
@@ -37,6 +37,8 @@ static kernel_cap_t usermodehelper_bset = CAP_FULL_SET;
|
||||
static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET;
|
||||
static DEFINE_SPINLOCK(umh_sysctl_lock);
|
||||
static DECLARE_RWSEM(umhelper_sem);
|
||||
static LIST_HEAD(umh_list);
|
||||
static DEFINE_MUTEX(umh_list_lock);
|
||||
|
||||
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
|
||||
{
|
||||
@@ -100,10 +102,12 @@ static int call_usermodehelper_exec_async(void *data)
|
||||
commit_creds(new);
|
||||
|
||||
sub_info->pid = task_pid_nr(current);
|
||||
if (sub_info->file)
|
||||
if (sub_info->file) {
|
||||
retval = do_execve_file(sub_info->file,
|
||||
sub_info->argv, sub_info->envp);
|
||||
else
|
||||
if (!retval)
|
||||
current->flags |= PF_UMH;
|
||||
} else
|
||||
retval = do_execve(getname_kernel(sub_info->path),
|
||||
(const char __user *const __user *)sub_info->argv,
|
||||
(const char __user *const __user *)sub_info->envp);
|
||||
@@ -517,6 +521,11 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
|
||||
goto out;
|
||||
|
||||
err = call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC);
|
||||
if (!err) {
|
||||
mutex_lock(&umh_list_lock);
|
||||
list_add(&info->list, &umh_list);
|
||||
mutex_unlock(&umh_list_lock);
|
||||
}
|
||||
out:
|
||||
fput(file);
|
||||
return err;
|
||||
@@ -679,6 +688,26 @@ static int proc_cap_handler(struct ctl_table *table, int write,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __exit_umh(struct task_struct *tsk)
|
||||
{
|
||||
struct umh_info *info;
|
||||
pid_t pid = tsk->pid;
|
||||
|
||||
mutex_lock(&umh_list_lock);
|
||||
list_for_each_entry(info, &umh_list, list) {
|
||||
if (info->pid == pid) {
|
||||
list_del(&info->list);
|
||||
mutex_unlock(&umh_list_lock);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&umh_list_lock);
|
||||
return;
|
||||
out:
|
||||
if (info->cleanup)
|
||||
info->cleanup(info);
|
||||
}
|
||||
|
||||
struct ctl_table usermodehelper_table[] = {
|
||||
{
|
||||
.procname = "bset",
|
||||
|
@@ -909,6 +909,26 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
|
||||
return to_wakeup ? to_wakeup->task : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* wq_worker_last_func - retrieve worker's last work function
|
||||
*
|
||||
* Determine the last function a worker executed. This is called from
|
||||
* the scheduler to get a worker's last known identity.
|
||||
*
|
||||
* CONTEXT:
|
||||
* spin_lock_irq(rq->lock)
|
||||
*
|
||||
* Return:
|
||||
* The last work function %current executed as a worker, NULL if it
|
||||
* hasn't executed any work yet.
|
||||
*/
|
||||
work_func_t wq_worker_last_func(struct task_struct *task)
|
||||
{
|
||||
struct worker *worker = kthread_data(task);
|
||||
|
||||
return worker->last_func;
|
||||
}
|
||||
|
||||
/**
|
||||
* worker_set_flags - set worker flags and adjust nr_running accordingly
|
||||
* @worker: self
|
||||
@@ -2184,6 +2204,9 @@ __acquires(&pool->lock)
|
||||
if (unlikely(cpu_intensive))
|
||||
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
|
||||
|
||||
/* tag the worker for identification in schedule() */
|
||||
worker->last_func = worker->current_func;
|
||||
|
||||
/* we're done with it, release */
|
||||
hash_del(&worker->hentry);
|
||||
worker->current_work = NULL;
|
||||
|
@@ -53,6 +53,9 @@ struct worker {
|
||||
|
||||
/* used only by rescuers to point to the target workqueue */
|
||||
struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */
|
||||
|
||||
/* used by the scheduler to determine a worker's last known identity */
|
||||
work_func_t last_func;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -67,9 +70,10 @@ static inline struct worker *current_wq_worker(void)
|
||||
|
||||
/*
|
||||
* Scheduler hooks for concurrency managed workqueue. Only to be used from
|
||||
* sched/core.c and workqueue.c.
|
||||
* sched/ and workqueue.c.
|
||||
*/
|
||||
void wq_worker_waking_up(struct task_struct *task, int cpu);
|
||||
struct task_struct *wq_worker_sleeping(struct task_struct *task);
|
||||
work_func_t wq_worker_last_func(struct task_struct *task);
|
||||
|
||||
#endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
|
||||
|
Reference in New Issue
Block a user