Merge tag 'v4.17-rc5' into irq/core, to pick up fixes

Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Ingo Molnar
2018-05-14 11:22:59 +02:00
949 changed files with 8721 additions and 4812 deletions

View File

@@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)
}
/* decrement refcnt of all bpf_progs that are stored in this map */
void bpf_fd_array_map_clear(struct bpf_map *map)
static void bpf_fd_array_map_clear(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;
@@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = {
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
.map_release_uref = bpf_fd_array_map_clear,
};
static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,

View File

@@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
return cnt;
}
static bool bpf_prog_array_copy_core(struct bpf_prog **prog,
u32 *prog_ids,
u32 request_cnt)
{
int i = 0;
for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
continue;
prog_ids[i] = (*prog)->aux->id;
if (++i == request_cnt) {
prog++;
break;
}
}
return !!(*prog);
}
int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
__u32 __user *prog_ids, u32 cnt)
{
struct bpf_prog **prog;
unsigned long err = 0;
u32 i = 0, *ids;
bool nospc;
u32 *ids;
/* users of this function are doing:
* cnt = bpf_prog_array_length();
@@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
return -ENOMEM;
rcu_read_lock();
prog = rcu_dereference(progs)->progs;
for (; *prog; prog++) {
if (*prog == &dummy_bpf_prog.prog)
continue;
ids[i] = (*prog)->aux->id;
if (++i == cnt) {
prog++;
break;
}
}
nospc = !!(*prog);
nospc = bpf_prog_array_copy_core(prog, ids, cnt);
rcu_read_unlock();
err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));
kfree(ids);
@@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
}
int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
__u32 __user *prog_ids, u32 request_cnt,
__u32 __user *prog_cnt)
u32 *prog_ids, u32 request_cnt,
u32 *prog_cnt)
{
struct bpf_prog **prog;
u32 cnt = 0;
if (array)
cnt = bpf_prog_array_length(array);
if (copy_to_user(prog_cnt, &cnt, sizeof(cnt)))
return -EFAULT;
*prog_cnt = cnt;
/* return early if user requested only program count or nothing to copy */
if (!request_cnt || !cnt)
return 0;
return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt);
/* this function is called under trace/bpf_trace.c: bpf_event_mutex */
prog = rcu_dereference_check(array, 1)->progs;
return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC
: 0;
}
static void bpf_prog_free_deferred(struct work_struct *work)

View File

@@ -43,6 +43,7 @@
#include <net/tcp.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
#include <linux/sched/signal.h>
#define SOCK_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -325,6 +326,9 @@ retry:
if (ret > 0) {
if (apply)
apply_bytes -= ret;
sg->offset += ret;
sg->length -= ret;
size -= ret;
offset += ret;
if (uncharge)
@@ -332,8 +336,6 @@ retry:
goto retry;
}
sg->length = size;
sg->offset = offset;
return ret;
}
@@ -391,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
} while (i != md->sg_end);
}
static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
static void free_bytes_sg(struct sock *sk, int bytes,
struct sk_msg_buff *md, bool charge)
{
struct scatterlist *sg = md->sg_data;
int i = md->sg_start, free;
@@ -401,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
if (bytes < free) {
sg[i].length -= bytes;
sg[i].offset += bytes;
sk_mem_uncharge(sk, bytes);
if (charge)
sk_mem_uncharge(sk, bytes);
break;
}
sk_mem_uncharge(sk, sg[i].length);
if (charge)
sk_mem_uncharge(sk, sg[i].length);
put_page(sg_page(&sg[i]));
bytes -= sg[i].length;
sg[i].length = 0;
@@ -416,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
if (i == MAX_SKB_FRAGS)
i = 0;
}
md->sg_start = i;
}
static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
@@ -523,8 +529,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
i = md->sg_start;
do {
r->sg_data[i] = md->sg_data[i];
size = (apply && apply_bytes < md->sg_data[i].length) ?
apply_bytes : md->sg_data[i].length;
@@ -535,6 +539,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
}
sk_mem_charge(sk, size);
r->sg_data[i] = md->sg_data[i];
r->sg_data[i].length = size;
md->sg_data[i].length -= size;
md->sg_data[i].offset += size;
@@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
struct sk_msg_buff *md,
int flags)
{
bool ingress = !!(md->flags & BPF_F_INGRESS);
struct smap_psock *psock;
struct scatterlist *sg;
int i, err, free = 0;
bool ingress = !!(md->flags & BPF_F_INGRESS);
int err = 0;
sg = md->sg_data;
@@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
out_rcu:
rcu_read_unlock();
out:
i = md->sg_start;
while (sg[i].length) {
free += sg[i].length;
put_page(sg_page(&sg[i]));
sg[i].length = 0;
i++;
if (i == MAX_SKB_FRAGS)
i = 0;
}
return free;
free_bytes_sg(NULL, send, md, false);
return err;
}
static inline void bpf_md_init(struct smap_psock *psock)
@@ -700,19 +697,26 @@ more_data:
err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
lock_sock(sk);
if (unlikely(err < 0)) {
free_start_sg(sk, m);
psock->sg_size = 0;
if (!cork)
*copied -= send;
} else {
psock->sg_size -= send;
}
if (cork) {
free_start_sg(sk, m);
psock->sg_size = 0;
kfree(m);
m = NULL;
err = 0;
}
if (unlikely(err))
*copied -= err;
else
psock->sg_size -= send;
break;
case __SK_DROP:
default:
free_bytes_sg(sk, send, m);
free_bytes_sg(sk, send, m, true);
apply_bytes_dec(psock, send);
*copied -= send;
psock->sg_size -= send;
@@ -732,6 +736,26 @@ out_err:
return err;
}
static int bpf_wait_data(struct sock *sk,
struct smap_psock *psk, int flags,
long timeo, int *err)
{
int rc;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
add_wait_queue(sk_sleep(sk), &wait);
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
rc = sk_wait_event(sk, &timeo,
!list_empty(&psk->ingress) ||
!skb_queue_empty(&sk->sk_receive_queue),
&wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
return rc;
}
static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len)
{
@@ -755,6 +779,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
bytes_ready:
while (copied != len) {
struct scatterlist *sg;
struct sk_msg_buff *md;
@@ -809,6 +834,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
}
if (!copied) {
long timeo;
int data;
int err = 0;
timeo = sock_rcvtimeo(sk, nonblock);
data = bpf_wait_data(sk, psock, flags, timeo, &err);
if (data) {
if (!skb_queue_empty(&sk->sk_receive_queue)) {
release_sock(sk);
smap_release_sock(psock, sk);
copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
return copied;
}
goto bytes_ready;
}
if (err)
copied = err;
}
release_sock(sk);
smap_release_sock(psock, sk);
return copied;
@@ -1442,9 +1489,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
if (attr->value_size > KMALLOC_MAX_SIZE)
return ERR_PTR(-E2BIG);
err = bpf_tcp_ulp_register();
if (err && err != -EEXIST)
return ERR_PTR(err);
@@ -1834,7 +1878,7 @@ static int sock_map_update_elem(struct bpf_map *map,
return err;
}
static void sock_map_release(struct bpf_map *map, struct file *map_file)
static void sock_map_release(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_prog *orig;
@@ -1858,7 +1902,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_get_next_key = sock_map_get_next_key,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
.map_release = sock_map_release,
.map_release_uref = sock_map_release,
};
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,

View File

@@ -26,6 +26,7 @@
#include <linux/cred.h>
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -102,12 +103,14 @@ const struct bpf_map_ops bpf_map_offload_ops = {
static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
{
const struct bpf_map_ops *ops;
u32 type = attr->map_type;
struct bpf_map *map;
int err;
if (attr->map_type >= ARRAY_SIZE(bpf_map_types))
if (type >= ARRAY_SIZE(bpf_map_types))
return ERR_PTR(-EINVAL);
ops = bpf_map_types[attr->map_type];
type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
ops = bpf_map_types[type];
if (!ops)
return ERR_PTR(-EINVAL);
@@ -122,7 +125,7 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
if (IS_ERR(map))
return map;
map->ops = ops;
map->map_type = attr->map_type;
map->map_type = type;
return map;
}
@@ -257,8 +260,8 @@ static void bpf_map_free_deferred(struct work_struct *work)
static void bpf_map_put_uref(struct bpf_map *map)
{
if (atomic_dec_and_test(&map->usercnt)) {
if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
bpf_fd_array_map_clear(map);
if (map->ops->map_release_uref)
map->ops->map_release_uref(map);
}
}
@@ -871,11 +874,17 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = {
static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
{
if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
const struct bpf_prog_ops *ops;
if (type >= ARRAY_SIZE(bpf_prog_types))
return -EINVAL;
type = array_index_nospec(type, ARRAY_SIZE(bpf_prog_types));
ops = bpf_prog_types[type];
if (!ops)
return -EINVAL;
if (!bpf_prog_is_dev_bound(prog->aux))
prog->aux->ops = bpf_prog_types[type];
prog->aux->ops = ops;
else
prog->aux->ops = &bpf_offload_prog_ops;
prog->type = type;

View File

@@ -34,6 +34,7 @@ int compat_get_timex(struct timex *txc, const struct compat_timex __user *utp)
{
struct compat_timex tx32;
memset(txc, 0, sizeof(struct timex));
if (copy_from_user(&tx32, utp, sizeof(struct compat_timex)))
return -EFAULT;

View File

@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/circ_buf.h>
#include <linux/poll.h>
#include <linux/nospec.h>
#include "internal.h"
@@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
return NULL;
/* AUX space */
if (pgoff >= rb->aux_pgoff)
return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
if (pgoff >= rb->aux_pgoff) {
int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages);
return virt_to_page(rb->aux_pages[aux_pgoff]);
}
}
return __perf_mmap_to_page(rb, pgoff);

View File

@@ -491,7 +491,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
if (!uprobe)
return NULL;
uprobe->inode = igrab(inode);
uprobe->inode = inode;
uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem);
@@ -502,7 +502,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
if (cur_uprobe) {
kfree(uprobe);
uprobe = cur_uprobe;
iput(inode);
}
return uprobe;
@@ -701,7 +700,6 @@ static void delete_uprobe(struct uprobe *uprobe)
rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock(&uprobes_treelock);
RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
iput(uprobe->inode);
put_uprobe(uprobe);
}
@@ -873,7 +871,8 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u
* tuple). Creation refcount stops uprobe_unregister from freeing the
* @uprobe even before the register operation is complete. Creation
* refcount is released when the last @uc for the @uprobe
* unregisters.
* unregisters. Caller of uprobe_register() is required to keep @inode
* (and the containing mount) referenced.
*
* Return errno if it cannot successully install probes
* else return 0 (success)

View File

@@ -2428,7 +2428,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)
struct kprobe_blacklist_entry *ent =
list_entry(v, struct kprobe_blacklist_entry, list);
seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr,
seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,
(void *)ent->end_addr, (void *)ent->start_addr);
return 0;
}

View File

@@ -55,7 +55,6 @@ enum KTHREAD_BITS {
KTHREAD_IS_PER_CPU = 0,
KTHREAD_SHOULD_STOP,
KTHREAD_SHOULD_PARK,
KTHREAD_IS_PARKED,
};
static inline void set_kthread_struct(void *kthread)
@@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task)
static void __kthread_parkme(struct kthread *self)
{
__set_current_state(TASK_PARKED);
while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
complete(&self->parked);
for (;;) {
set_current_state(TASK_PARKED);
if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
break;
schedule();
__set_current_state(TASK_PARKED);
}
clear_bit(KTHREAD_IS_PARKED, &self->flags);
__set_current_state(TASK_RUNNING);
}
@@ -194,6 +191,11 @@ void kthread_parkme(void)
}
EXPORT_SYMBOL_GPL(kthread_parkme);
void kthread_park_complete(struct task_struct *k)
{
complete(&to_kthread(k)->parked);
}
static int kthread(void *_create)
{
/* Copy data: it's on kthread's stack */
@@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* We clear the IS_PARKED bit here as we don't wait
* until the task has left the park code. So if we'd
* park before that happens we'd see the IS_PARKED bit
* which might be about to be cleared.
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
*/
if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
/*
* Newly created kthread was parked when the CPU was offline.
* The binding was lost and we need to set it again.
*/
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
wake_up_state(k, TASK_PARKED);
}
if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
__kthread_bind(k, kthread->cpu, TASK_PARKED);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
wake_up_state(k, TASK_PARKED);
}
EXPORT_SYMBOL_GPL(kthread_unpark);
@@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k)
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
return -EBUSY;
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
return 0;

View File

@@ -1472,7 +1472,8 @@ static ssize_t module_sect_show(struct module_attribute *mattr,
{
struct module_sect_attr *sattr =
container_of(mattr, struct module_sect_attr, mattr);
return sprintf(buf, "0x%pK\n", (void *)sattr->address);
return sprintf(buf, "0x%px\n", kptr_restrict < 2 ?
(void *)sattr->address : NULL);
}
static void free_sect_attrs(struct module_sect_attrs *sect_attrs)
@@ -3516,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
* Note that module_alloc() on most architectures creates W+X page
* mappings which won't be cleaned up until do_free_init() runs. Any
* code such as mark_rodata_ro() which depends on those mappings to
* be cleaned up needs to sync with the queued work - ie
* rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);

View File

@@ -2,6 +2,7 @@
/*
* Auto-group scheduling implementation:
*/
#include <linux/nospec.h>
#include "sched.h"
unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1;
@@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
static unsigned long next = INITIAL_JIFFIES;
struct autogroup *ag;
unsigned long shares;
int err;
int err, idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -EINVAL;
@@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
next = HZ / 10 + jiffies;
ag = autogroup_task_get(p);
shares = scale_load(sched_prio_to_weight[nice + 20]);
idx = array_index_nospec(nice + 20, 40);
shares = scale_load(sched_prio_to_weight[idx]);
down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, shares);

View File

@@ -7,6 +7,9 @@
*/
#include "sched.h"
#include <linux/kthread.h>
#include <linux/nospec.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev)
membarrier_mm_sync_core_before_usermode(mm);
mmdrop(mm);
}
if (unlikely(prev_state == TASK_DEAD)) {
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) {
switch (prev_state) {
case TASK_DEAD:
if (prev->sched_class->task_dead)
prev->sched_class->task_dead(prev);
/*
* Remove function-return probe instances associated with this
* task and put them back on the free list.
*/
kprobe_flush_task(prev);
/*
* Remove function-return probe instances associated with this
* task and put them back on the free list.
*/
kprobe_flush_task(prev);
/* Task is done with its stack. */
put_task_stack(prev);
/* Task is done with its stack. */
put_task_stack(prev);
put_task_struct(prev);
put_task_struct(prev);
break;
case TASK_PARKED:
kthread_park_complete(prev);
break;
}
}
tick_nohz_task_switch();
@@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt)
void __noreturn do_task_dead(void)
{
/*
* The setting of TASK_RUNNING by try_to_wake_up() may be delayed
* when the following two conditions become true.
* - There is race condition of mmap_sem (It is acquired by
* exit_mm()), and
* - SMI occurs before setting TASK_RUNINNG.
* (or hypervisor of virtual machine switches to other guest)
* As a result, we may become TASK_RUNNING after becoming TASK_DEAD
*
* To avoid it, we have to wait for releasing tsk->pi_lock which
* is held by try_to_wake_up()
*/
raw_spin_lock_irq(&current->pi_lock);
raw_spin_unlock_irq(&current->pi_lock);
/* Causes final put_task_struct in finish_task_switch(): */
__set_current_state(TASK_DEAD);
set_special_state(TASK_DEAD);
/* Tell freezer to ignore us: */
current->flags |= PF_NOFREEZE;
@@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css,
struct cftype *cft, s64 nice)
{
unsigned long weight;
int idx;
if (nice < MIN_NICE || nice > MAX_NICE)
return -ERANGE;
weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO];
idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO;
idx = array_index_nospec(idx, 40);
weight = sched_prio_to_weight[idx];
return sched_group_set_shares(css_tg(css), scale_load(weight));
}
#endif

View File

@@ -305,7 +305,8 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
*/
if (busy && next_f < sg_policy->next_freq) {
if (busy && next_f < sg_policy->next_freq &&
sg_policy->next_freq != UINT_MAX) {
next_f = sg_policy->next_freq;
/* Reset cached freq as next_freq has changed */
@@ -396,19 +397,6 @@ static void sugov_irq_work(struct irq_work *irq_work)
sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
/*
* For RT tasks, the schedutil governor shoots the frequency to maximum.
* Special care must be taken to ensure that this kthread doesn't result
* in the same behavior.
*
* This is (mostly) guaranteed by the work_in_progress flag. The flag is
* updated only at the end of the sugov_work() function and before that
* the schedutil governor rejects all other frequency scaling requests.
*
* There is a very rare case though, where the RT thread yields right
* after the work_in_progress flag is cleared. The effects of that are
* neglected for now.
*/
kthread_queue_work(&sg_policy->worker, &sg_policy->work);
}

View File

@@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p)
static void numa_migrate_preferred(struct task_struct *p)
{
unsigned long interval = HZ;
unsigned long numa_migrate_retry;
/* This task has no NUMA fault statistics yet */
if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
@@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p)
/* Periodically retry migrating the task to the preferred node */
interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
numa_migrate_retry = jiffies + interval;
/*
* Check that the new retry threshold is after the current one. If
* the retry is in the future, it implies that wake_affine has
* temporarily asked NUMA balancing to backoff from placement.
*/
if (numa_migrate_retry > p->numa_migrate_retry)
return;
/* Safe to try placing the task on the preferred node */
p->numa_migrate_retry = numa_migrate_retry;
p->numa_migrate_retry = jiffies + interval;
/* Success if task is already running on preferred CPU */
if (task_node(p) == p->numa_preferred_nid)
@@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
}
#ifdef CONFIG_NUMA_BALANCING
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
unsigned long interval;
if (!static_branch_likely(&sched_numa_balancing))
return;
/* If balancing has no preference then continue gathering data */
if (p->numa_preferred_nid == -1)
return;
/*
* If the wakeup is not affecting locality then it is neutral from
* the perspective of NUMA balacing so continue gathering data.
*/
if (cpu_to_node(prev_cpu) == cpu_to_node(target))
return;
/*
* Temporarily prevent NUMA balancing trying to place waker/wakee after
* wakee has been moved by wake_affine. This will potentially allow
* related tasks to converge and update their data placement. The
* 4 * numa_scan_period is to allow the two-pass filter to migrate
* hot data to the wakers node.
*/
interval = max(sysctl_numa_balancing_scan_delay,
p->numa_scan_period << 2);
p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
interval = max(sysctl_numa_balancing_scan_delay,
current->numa_scan_period << 2);
current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval);
}
#else
static void
update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target)
{
}
#endif
static int wake_affine(struct sched_domain *sd, struct task_struct *p,
int this_cpu, int prev_cpu, int sync)
{
@@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p,
if (target == nr_cpumask_bits)
return prev_cpu;
update_wa_numa_placement(p, prev_cpu, target);
schedstat_inc(sd->ttwu_move_affine);
schedstat_inc(p->se.statistics.nr_wakeups_affine);
return target;
@@ -9847,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (curr_cost > this_rq->max_idle_balance_cost)
this_rq->max_idle_balance_cost = curr_cost;
out:
/*
* While browsing the domains, we released the rq lock, a task could
* have been enqueued in the meantime. Since we're not going idle,
@@ -9855,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
if (this_rq->cfs.h_nr_running && !pulled_task)
pulled_task = 1;
out:
/* Move the next balance forward */
if (time_after(this_rq->next_balance, next_balance))
this_rq->next_balance = next_balance;

View File

@@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
return;
}
set_special_state(TASK_TRACED);
/*
* We're committing to trapping. TRACED should be visible before
* TRAPPING is cleared; otherwise, the tracer might fail do_wait().
* Also, transition to TRACED and updates to ->jobctl should be
* atomic with respect to siglock and should be done after the arch
* hook as siglock is released and regrabbed across it.
*
* TRACER TRACEE
*
* ptrace_attach()
* [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED)
* do_wait()
* set_current_state() smp_wmb();
* ptrace_do_wait()
* wait_task_stopped()
* task_stopped_code()
* [L] task_is_traced() [S] task_clear_jobctl_trapping();
*/
set_current_state(TASK_TRACED);
smp_wmb();
current->last_siginfo = info;
current->exit_code = exit_code;
@@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr)
if (task_participate_group_stop(current))
notify = CLD_STOPPED;
__set_current_state(TASK_STOPPED);
set_special_state(TASK_STOPPED);
spin_unlock_irq(&current->sighand->siglock);
/*

View File

@@ -21,6 +21,7 @@
#include <linux/smpboot.h>
#include <linux/atomic.h>
#include <linux/nmi.h>
#include <linux/sched/wake_q.h>
/*
* Structure to determine completion condition and record errors. May
@@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done)
}
static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
struct cpu_stop_work *work)
struct cpu_stop_work *work,
struct wake_q_head *wakeq)
{
list_add_tail(&work->list, &stopper->works);
wake_up_process(stopper->thread);
wake_q_add(wakeq, stopper->thread);
}
/* queue @work to @stopper. if offline, @work is completed immediately */
static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
DEFINE_WAKE_Q(wakeq);
unsigned long flags;
bool enabled;
spin_lock_irqsave(&stopper->lock, flags);
enabled = stopper->enabled;
if (enabled)
__cpu_stop_queue_work(stopper, work);
__cpu_stop_queue_work(stopper, work, &wakeq);
else if (work->done)
cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags);
wake_up_q(&wakeq);
return enabled;
}
@@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1,
{
struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1);
struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2);
DEFINE_WAKE_Q(wakeq);
int err;
retry:
spin_lock_irq(&stopper1->lock);
@@ -252,8 +258,8 @@ retry:
goto unlock;
err = 0;
__cpu_stop_queue_work(stopper1, work1);
__cpu_stop_queue_work(stopper2, work2);
__cpu_stop_queue_work(stopper1, work1, &wakeq);
__cpu_stop_queue_work(stopper2, work2, &wakeq);
unlock:
spin_unlock(&stopper2->lock);
spin_unlock_irq(&stopper1->lock);
@@ -263,6 +269,9 @@ unlock:
cpu_relax();
goto retry;
}
wake_up_q(&wakeq);
return err;
}
/**

View File

@@ -704,24 +704,6 @@ static const struct bin_table bin_net_netfilter_table[] = {
{}
};
static const struct bin_table bin_net_irda_table[] = {
{ CTL_INT, NET_IRDA_DISCOVERY, "discovery" },
{ CTL_STR, NET_IRDA_DEVNAME, "devname" },
{ CTL_INT, NET_IRDA_DEBUG, "debug" },
{ CTL_INT, NET_IRDA_FAST_POLL, "fast_poll_increase" },
{ CTL_INT, NET_IRDA_DISCOVERY_SLOTS, "discovery_slots" },
{ CTL_INT, NET_IRDA_DISCOVERY_TIMEOUT, "discovery_timeout" },
{ CTL_INT, NET_IRDA_SLOT_TIMEOUT, "slot_timeout" },
{ CTL_INT, NET_IRDA_MAX_BAUD_RATE, "max_baud_rate" },
{ CTL_INT, NET_IRDA_MIN_TX_TURN_TIME, "min_tx_turn_time" },
{ CTL_INT, NET_IRDA_MAX_TX_DATA_SIZE, "max_tx_data_size" },
{ CTL_INT, NET_IRDA_MAX_TX_WINDOW, "max_tx_window" },
{ CTL_INT, NET_IRDA_MAX_NOREPLY_TIME, "max_noreply_time" },
{ CTL_INT, NET_IRDA_WARN_NOREPLY_TIME, "warn_noreply_time" },
{ CTL_INT, NET_IRDA_LAP_KEEPALIVE_TIME, "lap_keepalive_time" },
{}
};
static const struct bin_table bin_net_table[] = {
{ CTL_DIR, NET_CORE, "core", bin_net_core_table },
/* NET_ETHER not used */
@@ -743,7 +725,7 @@ static const struct bin_table bin_net_table[] = {
{ CTL_DIR, NET_LLC, "llc", bin_net_llc_table },
{ CTL_DIR, NET_NETFILTER, "netfilter", bin_net_netfilter_table },
/* NET_DCCP "dccp" no longer used */
{ CTL_DIR, NET_IRDA, "irda", bin_net_irda_table },
/* NET_IRDA "irda" no longer used */
{ CTL_INT, 2089, "nf_conntrack_max" },
{}
};

View File

@@ -119,6 +119,16 @@ static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
static void inline clocksource_watchdog_lock(unsigned long *flags)
{
spin_lock_irqsave(&watchdog_lock, *flags);
}
static void inline clocksource_watchdog_unlock(unsigned long *flags)
{
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
static int clocksource_watchdog_kthread(void *data);
static void __clocksource_change_rating(struct clocksource *cs, int rating);
@@ -142,9 +152,19 @@ static void __clocksource_unstable(struct clocksource *cs)
cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
cs->flags |= CLOCK_SOURCE_UNSTABLE;
/*
* If the clocksource is registered clocksource_watchdog_kthread() will
* re-rate and re-select.
*/
if (list_empty(&cs->list)) {
cs->rating = 0;
return;
}
if (cs->mark_unstable)
cs->mark_unstable(cs);
/* kick clocksource_watchdog_kthread() */
if (finished_booting)
schedule_work(&watchdog_work);
}
@@ -153,10 +173,8 @@ static void __clocksource_unstable(struct clocksource *cs)
* clocksource_mark_unstable - mark clocksource unstable via watchdog
* @cs: clocksource to be marked unstable
*
* This function is called instead of clocksource_change_rating from
* cpu hotplug code to avoid a deadlock between the clocksource mutex
* and the cpu hotplug mutex. It defers the update of the clocksource
* to the watchdog thread.
* This function is called by the x86 TSC code to mark clocksources as unstable;
* it defers demotion and re-selection to a kthread.
*/
void clocksource_mark_unstable(struct clocksource *cs)
{
@@ -164,7 +182,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
spin_lock_irqsave(&watchdog_lock, flags);
if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
if (list_empty(&cs->wd_list))
if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
list_add(&cs->wd_list, &watchdog_list);
__clocksource_unstable(cs);
}
@@ -319,9 +337,8 @@ static void clocksource_resume_watchdog(void)
static void clocksource_enqueue_watchdog(struct clocksource *cs)
{
unsigned long flags;
INIT_LIST_HEAD(&cs->wd_list);
spin_lock_irqsave(&watchdog_lock, flags);
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
/* cs is a clocksource to be watched. */
list_add(&cs->wd_list, &watchdog_list);
@@ -331,7 +348,6 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
}
spin_unlock_irqrestore(&watchdog_lock, flags);
}
static void clocksource_select_watchdog(bool fallback)
@@ -373,9 +389,6 @@ static void clocksource_select_watchdog(bool fallback)
static void clocksource_dequeue_watchdog(struct clocksource *cs)
{
unsigned long flags;
spin_lock_irqsave(&watchdog_lock, flags);
if (cs != watchdog) {
if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
/* cs is a watched clocksource. */
@@ -384,21 +397,19 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
clocksource_stop_watchdog();
}
}
spin_unlock_irqrestore(&watchdog_lock, flags);
}
static int __clocksource_watchdog_kthread(void)
{
struct clocksource *cs, *tmp;
unsigned long flags;
LIST_HEAD(unstable);
int select = 0;
spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
list_add(&cs->wd_list, &unstable);
__clocksource_change_rating(cs, 0);
select = 1;
}
if (cs->flags & CLOCK_SOURCE_RESELECT) {
@@ -410,11 +421,6 @@ static int __clocksource_watchdog_kthread(void)
clocksource_stop_watchdog();
spin_unlock_irqrestore(&watchdog_lock, flags);
/* Needs to be done outside of watchdog lock */
list_for_each_entry_safe(cs, tmp, &unstable, wd_list) {
list_del_init(&cs->wd_list);
__clocksource_change_rating(cs, 0);
}
return select;
}
@@ -447,6 +453,9 @@ static inline int __clocksource_watchdog_kthread(void) { return 0; }
static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
void clocksource_mark_unstable(struct clocksource *cs) { }
static void inline clocksource_watchdog_lock(unsigned long *flags) { }
static void inline clocksource_watchdog_unlock(unsigned long *flags) { }
#endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
/**
@@ -779,14 +788,19 @@ EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
*/
int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
{
unsigned long flags;
/* Initialize mult/shift and max_idle_ns */
__clocksource_update_freq_scale(cs, scale, freq);
/* Add clocksource to the clocksource list */
mutex_lock(&clocksource_mutex);
clocksource_watchdog_lock(&flags);
clocksource_enqueue(cs);
clocksource_enqueue_watchdog(cs);
clocksource_watchdog_unlock(&flags);
clocksource_select();
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
@@ -808,8 +822,13 @@ static void __clocksource_change_rating(struct clocksource *cs, int rating)
*/
void clocksource_change_rating(struct clocksource *cs, int rating)
{
unsigned long flags;
mutex_lock(&clocksource_mutex);
clocksource_watchdog_lock(&flags);
__clocksource_change_rating(cs, rating);
clocksource_watchdog_unlock(&flags);
clocksource_select();
clocksource_select_watchdog(false);
mutex_unlock(&clocksource_mutex);
@@ -821,6 +840,8 @@ EXPORT_SYMBOL(clocksource_change_rating);
*/
static int clocksource_unbind(struct clocksource *cs)
{
unsigned long flags;
if (clocksource_is_watchdog(cs)) {
/* Select and try to install a replacement watchdog. */
clocksource_select_watchdog(true);
@@ -834,8 +855,12 @@ static int clocksource_unbind(struct clocksource *cs)
if (curr_clocksource == cs)
return -EBUSY;
}
clocksource_watchdog_lock(&flags);
clocksource_dequeue_watchdog(cs);
list_del_init(&cs->list);
clocksource_watchdog_unlock(&flags);
return 0;
}

View File

@@ -90,6 +90,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI,
.clockid = CLOCK_TAI,
@@ -105,6 +110,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
.clockid = CLOCK_REALTIME,
.get_time = &ktime_get_real,
},
{
.index = HRTIMER_BASE_BOOTTIME_SOFT,
.clockid = CLOCK_BOOTTIME,
.get_time = &ktime_get_boottime,
},
{
.index = HRTIMER_BASE_TAI_SOFT,
.clockid = CLOCK_TAI,
@@ -119,7 +129,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_MONOTONIC,
[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
[CLOCK_TAI] = HRTIMER_BASE_TAI,
};
@@ -571,12 +581,14 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;
ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;
ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq,
offs_real, offs_tai);
offs_real, offs_boot, offs_tai);
base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real;
base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;
base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;
return now;

View File

@@ -83,8 +83,6 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)
case CLOCK_BOOTTIME:
get_monotonic_boottime64(tp);
break;
case CLOCK_MONOTONIC_ACTIVE:
ktime_get_active_ts64(tp);
default:
return -EINVAL;
}

View File

@@ -252,16 +252,15 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *
return 0;
}
static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)
{
timekeeping_clocktai64(tp);
get_monotonic_boottime64(tp);
return 0;
}
static int posix_get_monotonic_active(clockid_t which_clock,
struct timespec64 *tp)
static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)
{
ktime_get_active_ts64(tp);
timekeeping_clocktai64(tp);
return 0;
}
@@ -1317,9 +1316,19 @@ static const struct k_clock clock_tai = {
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock clock_monotonic_active = {
static const struct k_clock clock_boottime = {
.clock_getres = posix_get_hrtimer_res,
.clock_get = posix_get_monotonic_active,
.clock_get = posix_get_boottime,
.nsleep = common_nsleep,
.timer_create = common_timer_create,
.timer_set = common_timer_set,
.timer_get = common_timer_get,
.timer_del = common_timer_del,
.timer_rearm = common_hrtimer_rearm,
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
.timer_arm = common_hrtimer_arm,
};
static const struct k_clock * const posix_clocks[] = {
@@ -1330,11 +1339,10 @@ static const struct k_clock * const posix_clocks[] = {
[CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw,
[CLOCK_REALTIME_COARSE] = &clock_realtime_coarse,
[CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse,
[CLOCK_BOOTTIME] = &clock_monotonic,
[CLOCK_BOOTTIME] = &clock_boottime,
[CLOCK_REALTIME_ALARM] = &alarm_clock,
[CLOCK_BOOTTIME_ALARM] = &alarm_clock,
[CLOCK_TAI] = &clock_tai,
[CLOCK_MONOTONIC_ACTIVE] = &clock_monotonic_active,
};
static const struct k_clock *clockid_to_kclock(const clockid_t id)

View File

@@ -419,19 +419,6 @@ void tick_suspend_local(void)
clockevents_shutdown(td->evtdev);
}
static void tick_forward_next_period(void)
{
ktime_t delta, now = ktime_get();
u64 n;
delta = ktime_sub(now, tick_next_period);
n = ktime_divns(delta, tick_period);
tick_next_period += n * tick_period;
if (tick_next_period < now)
tick_next_period += tick_period;
tick_sched_forward_next_period();
}
/**
* tick_resume_local - Resume the local tick device
*
@@ -444,8 +431,6 @@ void tick_resume_local(void)
struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
bool broadcast = tick_resume_check_broadcast();
tick_forward_next_period();
clockevents_tick_resume(td->evtdev);
if (!broadcast) {
if (td->mode == TICKDEV_MODE_PERIODIC)

View File

@@ -141,12 +141,6 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
#endif /* !(BROADCAST && ONESHOT) */
#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS)
extern void tick_sched_forward_next_period(void);
#else
static inline void tick_sched_forward_next_period(void) { }
#endif
/* NO_HZ_FULL internal */
#ifdef CONFIG_NO_HZ_FULL
extern void tick_nohz_init(void);

View File

@@ -51,15 +51,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)
*/
static ktime_t last_jiffies_update;
/*
* Called after resume. Make sure that jiffies are not fast forwarded due to
* clock monotonic being forwarded by the suspended time.
*/
void tick_sched_forward_next_period(void)
{
last_jiffies_update = tick_next_period;
}
/*
* Must be called with interrupts disabled !
*/
@@ -804,12 +795,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
return;
}
hrtimer_set_expires(&ts->sched_timer, tick);
if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
else
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
} else {
hrtimer_set_expires(&ts->sched_timer, tick);
tick_program_event(tick, 1);
}
}
static void tick_nohz_retain_tick(struct tick_sched *ts)

View File

@@ -138,12 +138,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
/* Update both bases so mono and raw stay coupled. */
tk->tkr_mono.base += delta;
tk->tkr_raw.base += delta;
/* Accumulate time spent in suspend */
tk->time_suspended += delta;
tk->offs_boot = ktime_add(tk->offs_boot, delta);
}
/*
@@ -473,6 +468,36 @@ u64 ktime_get_raw_fast_ns(void)
}
EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
/**
* ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
*
* To keep it NMI safe since we're accessing from tracing, we're not using a
* separate timekeeper with updates to monotonic clock and boot offset
* protected with seqlocks. This has the following minor side effects:
*
* (1) Its possible that a timestamp be taken after the boot offset is updated
* but before the timekeeper is updated. If this happens, the new boot offset
* is added to the old timekeeping making the clock appear to update slightly
* earlier:
* CPU 0 CPU 1
* timekeeping_inject_sleeptime64()
* __timekeeping_inject_sleeptime(tk, delta);
* timestamp();
* timekeeping_update(tk, TK_CLEAR_NTP...);
*
* (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
* partially updated. Since the tk->offs_boot update is a rare event, this
* should be a rare occurrence which postprocessing should be able to handle.
*/
u64 notrace ktime_get_boot_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
}
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
/*
* See comment for __ktime_get_fast_ns() vs. timestamp ordering
*/
@@ -764,6 +789,7 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
static ktime_t *offsets[TK_OFFS_MAX] = {
[TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
[TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
[TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
};
@@ -860,39 +886,6 @@ void ktime_get_ts64(struct timespec64 *ts)
}
EXPORT_SYMBOL_GPL(ktime_get_ts64);
/**
* ktime_get_active_ts64 - Get the active non-suspended monotonic clock
* @ts: pointer to timespec variable
*
* The function calculates the monotonic clock from the realtime clock and
* the wall_to_monotonic offset, subtracts the accumulated suspend time and
* stores the result in normalized timespec64 format in the variable
* pointed to by @ts.
*/
void ktime_get_active_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 tomono, tsusp;
u64 nsec, nssusp;
unsigned int seq;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
ts->tv_sec = tk->xtime_sec;
nsec = timekeeping_get_ns(&tk->tkr_mono);
tomono = tk->wall_to_monotonic;
nssusp = tk->time_suspended;
} while (read_seqcount_retry(&tk_core.seq, seq));
ts->tv_sec += tomono.tv_sec;
ts->tv_nsec = 0;
timespec64_add_ns(ts, nsec + tomono.tv_nsec);
tsusp = ns_to_timespec64(nssusp);
*ts = timespec64_sub(*ts, tsusp);
}
/**
* ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
*
@@ -1593,6 +1586,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
return;
}
tk_xtime_add(tk, delta);
tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
tk_debug_account_sleep_time(delta);
}
@@ -2125,7 +2119,7 @@ out:
void getboottime64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended);
ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
*ts = ktime_to_timespec64(t);
}
@@ -2188,6 +2182,7 @@ void do_timer(unsigned long ticks)
* ktime_get_update_offsets_now - hrtimer helper
* @cwsseq: pointer to check and store the clock was set sequence number
* @offs_real: pointer to storage for monotonic -> realtime offset
* @offs_boot: pointer to storage for monotonic -> boottime offset
* @offs_tai: pointer to storage for monotonic -> clock tai offset
*
* Returns current monotonic time and updates the offsets if the
@@ -2197,7 +2192,7 @@ void do_timer(unsigned long ticks)
* Called from hrtimer_interrupt() or retrigger_next_event()
*/
ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
ktime_t *offs_tai)
ktime_t *offs_boot, ktime_t *offs_tai)
{
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
@@ -2214,6 +2209,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,
if (*cwsseq != tk->clock_was_set_seq) {
*cwsseq = tk->clock_was_set_seq;
*offs_real = tk->offs_real;
*offs_boot = tk->offs_boot;
*offs_tai = tk->offs_tai;
}

View File

@@ -6,6 +6,7 @@
*/
extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,
ktime_t *offs_real,
ktime_t *offs_boot,
ktime_t *offs_tai);
extern int timekeeping_valid_for_hres(void);

View File

@@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
{
struct perf_event_query_bpf __user *uquery = info;
struct perf_event_query_bpf query = {};
u32 *ids, prog_cnt, ids_len;
int ret;
if (!capable(CAP_SYS_ADMIN))
@@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)
return -EINVAL;
if (copy_from_user(&query, uquery, sizeof(query)))
return -EFAULT;
if (query.ids_len > BPF_TRACE_MAX_PROGS)
ids_len = query.ids_len;
if (ids_len > BPF_TRACE_MAX_PROGS)
return -E2BIG;
ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
if (!ids)
return -ENOMEM;
/*
* The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
* is required when user only wants to check for uquery->prog_cnt.
* There is no need to check for it since the case is handled
* gracefully in bpf_prog_array_copy_info.
*/
mutex_lock(&bpf_event_mutex);
ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
uquery->ids,
query.ids_len,
&uquery->prog_cnt);
ids,
ids_len,
&prog_cnt);
mutex_unlock(&bpf_event_mutex);
if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
ret = -EFAULT;
kfree(ids);
return ret;
}

View File

@@ -5514,10 +5514,10 @@ static __init int ftrace_init_dyn_tracefs(struct dentry *d_tracer)
ftrace_create_filter_files(&global_ops, d_tracer);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
trace_create_file("set_graph_function", 0444, d_tracer,
trace_create_file("set_graph_function", 0644, d_tracer,
NULL,
&ftrace_graph_fops);
trace_create_file("set_graph_notrace", 0444, d_tracer,
trace_create_file("set_graph_notrace", 0644, d_tracer,
NULL,
&ftrace_graph_notrace_fops);
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

View File

@@ -1165,7 +1165,7 @@ static struct {
{ trace_clock, "perf", 1 },
{ ktime_get_mono_fast_ns, "mono", 1 },
{ ktime_get_raw_fast_ns, "mono_raw", 1 },
{ ktime_get_mono_fast_ns, "boot", 1 },
{ ktime_get_boot_fast_ns, "boot", 1 },
ARCH_TRACE_CLOCKS
};

View File

@@ -356,7 +356,7 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
__field( unsigned int, seqnum )
),
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n",
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tnmi-ts:%llu\tnmi-count:%u\n",
__entry->seqnum,
__entry->tv_sec,
__entry->tv_nsec,

View File

@@ -762,6 +762,9 @@ static int regex_match_full(char *str, struct regex *r, int len)
static int regex_match_front(char *str, struct regex *r, int len)
{
if (len < r->len)
return 0;
if (strncmp(str, r->pattern, r->len) == 0)
return 1;
return 0;
@@ -1499,14 +1502,14 @@ static int process_preds(struct trace_event_call *call,
return ret;
}
if (!nr_preds) {
prog = NULL;
} else {
prog = predicate_parse(filter_string, nr_parens, nr_preds,
if (!nr_preds)
return -EINVAL;
prog = predicate_parse(filter_string, nr_parens, nr_preds,
parse_pred, call, pe);
if (IS_ERR(prog))
return PTR_ERR(prog);
}
if (IS_ERR(prog))
return PTR_ERR(prog);
rcu_assign_pointer(filter->prog, prog);
return 0;
}

View File

@@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else if (strcmp(modifier, "usecs") == 0)
*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;
else {
hist_err("Invalid field modifier: ", modifier);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -2481,6 +2482,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
else {
field = trace_find_event_field(file->event_call, field_name);
if (!field || !field->size) {
hist_err("Couldn't find field: ", field_name);
field = ERR_PTR(-EINVAL);
goto out;
}
@@ -4913,6 +4915,16 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
seq_printf(m, "%s", field_name);
} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)
seq_puts(m, "common_timestamp");
if (hist_field->flags) {
if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
!(hist_field->flags & HIST_FIELD_FL_EXPR)) {
const char *flags = get_hist_field_flags(hist_field);
if (flags)
seq_printf(m, ".%s", flags);
}
}
}
static int event_hist_trigger_print(struct seq_file *m,

View File

@@ -472,7 +472,7 @@ static __init int stack_trace_init(void)
NULL, &stack_trace_fops);
#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("stack_trace_filter", 0444, d_tracer,
trace_create_file("stack_trace_filter", 0644, d_tracer,
&trace_ops, &stack_trace_filter_fops);
#endif

View File

@@ -55,6 +55,7 @@ struct trace_uprobe {
struct list_head list;
struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
struct path path;
struct inode *inode;
char *filename;
unsigned long offset;
@@ -289,7 +290,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
for (i = 0; i < tu->tp.nr_args; i++)
traceprobe_free_probe_arg(&tu->tp.args[i]);
iput(tu->inode);
path_put(&tu->path);
kfree(tu->tp.call.class->system);
kfree(tu->tp.call.name);
kfree(tu->filename);
@@ -363,7 +364,6 @@ end:
static int create_trace_uprobe(int argc, char **argv)
{
struct trace_uprobe *tu;
struct inode *inode;
char *arg, *event, *group, *filename;
char buf[MAX_EVENT_NAME_LEN];
struct path path;
@@ -371,7 +371,6 @@ static int create_trace_uprobe(int argc, char **argv)
bool is_delete, is_return;
int i, ret;
inode = NULL;
ret = 0;
is_delete = false;
is_return = false;
@@ -437,21 +436,16 @@ static int create_trace_uprobe(int argc, char **argv)
}
/* Find the last occurrence, in case the path contains ':' too. */
arg = strrchr(argv[1], ':');
if (!arg) {
ret = -EINVAL;
goto fail_address_parse;
}
if (!arg)
return -EINVAL;
*arg++ = '\0';
filename = argv[1];
ret = kern_path(filename, LOOKUP_FOLLOW, &path);
if (ret)
goto fail_address_parse;
return ret;
inode = igrab(d_real_inode(path.dentry));
path_put(&path);
if (!inode || !S_ISREG(inode->i_mode)) {
if (!d_is_reg(path.dentry)) {
ret = -EINVAL;
goto fail_address_parse;
}
@@ -490,7 +484,7 @@ static int create_trace_uprobe(int argc, char **argv)
goto fail_address_parse;
}
tu->offset = offset;
tu->inode = inode;
tu->path = path;
tu->filename = kstrdup(filename, GFP_KERNEL);
if (!tu->filename) {
@@ -558,7 +552,7 @@ error:
return ret;
fail_address_parse:
iput(inode);
path_put(&path);
pr_info("Failed to parse address or file.\n");
@@ -922,6 +916,7 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
goto err_flags;
tu->consumer.filter = filter;
tu->inode = d_real_inode(tu->path.dentry);
ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
if (ret)
goto err_buffer;
@@ -967,6 +962,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
WARN_ON(!uprobe_filter_is_empty(&tu->filter));
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
tu->inode = NULL;
tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
uprobe_buffer_disable();
@@ -1337,7 +1333,6 @@ struct trace_event_call *
create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
{
struct trace_uprobe *tu;
struct inode *inode;
struct path path;
int ret;
@@ -1345,11 +1340,8 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
if (ret)
return ERR_PTR(ret);
inode = igrab(d_inode(path.dentry));
path_put(&path);
if (!inode || !S_ISREG(inode->i_mode)) {
iput(inode);
if (!d_is_reg(path.dentry)) {
path_put(&path);
return ERR_PTR(-EINVAL);
}
@@ -1364,11 +1356,12 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
if (IS_ERR(tu)) {
pr_info("Failed to allocate trace_uprobe.(%d)\n",
(int)PTR_ERR(tu));
path_put(&path);
return ERR_CAST(tu);
}
tu->offset = offs;
tu->inode = inode;
tu->path = path;
tu->filename = kstrdup(name, GFP_KERNEL);
init_trace_event_call(tu, &tu->tp.call);

View File

@@ -207,7 +207,7 @@ static int tracepoint_add_func(struct tracepoint *tp,
lockdep_is_held(&tracepoints_mutex));
old = func_add(&tp_funcs, func, prio);
if (IS_ERR(old)) {
WARN_ON_ONCE(1);
WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
return PTR_ERR(old);
}
@@ -239,7 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,
lockdep_is_held(&tracepoints_mutex));
old = func_remove(&tp_funcs, func);
if (IS_ERR(old)) {
WARN_ON_ONCE(1);
WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);
return PTR_ERR(old);
}