Merge branch 'sched/urgent'
This commit is contained in:
@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
|
||||
|
||||
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
|
||||
{
|
||||
if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
|
||||
if (unlikely(max_optlen < 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (unlikely(max_optlen > PAGE_SIZE)) {
|
||||
/* We don't expose optvals that are greater than PAGE_SIZE
|
||||
* to the BPF program.
|
||||
*/
|
||||
max_optlen = PAGE_SIZE;
|
||||
}
|
||||
|
||||
ctx->optval = kzalloc(max_optlen, GFP_USER);
|
||||
if (!ctx->optval)
|
||||
return -ENOMEM;
|
||||
|
||||
ctx->optval_end = ctx->optval + max_optlen;
|
||||
|
||||
return 0;
|
||||
return max_optlen;
|
||||
}
|
||||
|
||||
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
|
||||
@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
|
||||
*/
|
||||
max_optlen = max_t(int, 16, *optlen);
|
||||
|
||||
ret = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (ret)
|
||||
return ret;
|
||||
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (max_optlen < 0)
|
||||
return max_optlen;
|
||||
|
||||
ctx.optlen = *optlen;
|
||||
|
||||
if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
|
||||
if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
@@ -1353,8 +1360,14 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
|
||||
/* export any potential modifications */
|
||||
*level = ctx.level;
|
||||
*optname = ctx.optname;
|
||||
*optlen = ctx.optlen;
|
||||
*kernel_optval = ctx.optval;
|
||||
|
||||
/* optlen == 0 from BPF indicates that we should
|
||||
* use original userspace data.
|
||||
*/
|
||||
if (ctx.optlen != 0) {
|
||||
*optlen = ctx.optlen;
|
||||
*kernel_optval = ctx.optval;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
|
||||
return retval;
|
||||
|
||||
ret = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ctx.optlen = max_optlen;
|
||||
|
||||
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
|
||||
if (max_optlen < 0)
|
||||
return max_optlen;
|
||||
|
||||
if (!retval) {
|
||||
/* If kernel getsockopt finished successfully,
|
||||
* copy whatever was returned to the user back
|
||||
@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ctx.optlen > max_optlen)
|
||||
ctx.optlen = max_optlen;
|
||||
|
||||
if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
|
||||
if (copy_from_user(ctx.optval, optval,
|
||||
min(ctx.optlen, max_optlen)) != 0) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
@@ -1436,10 +1447,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
|
||||
put_user(ctx.optlen, optlen)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
if (ctx.optlen != 0) {
|
||||
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
|
||||
put_user(ctx.optlen, optlen)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = ctx.retval;
|
||||
|
@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
|
||||
static DEFINE_SPINLOCK(dev_map_lock);
|
||||
static LIST_HEAD(dev_map_list);
|
||||
|
||||
static struct hlist_head *dev_map_create_hash(unsigned int entries)
|
||||
static struct hlist_head *dev_map_create_hash(unsigned int entries,
|
||||
int numa_node)
|
||||
{
|
||||
int i;
|
||||
struct hlist_head *hash;
|
||||
|
||||
hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
|
||||
hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
|
||||
if (hash != NULL)
|
||||
for (i = 0; i < entries; i++)
|
||||
INIT_HLIST_HEAD(&hash[i]);
|
||||
@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
|
||||
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
|
||||
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
|
||||
dtab->map.numa_node);
|
||||
if (!dtab->dev_index_head)
|
||||
goto free_charge;
|
||||
|
||||
@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
|
||||
}
|
||||
}
|
||||
|
||||
kfree(dtab->dev_index_head);
|
||||
bpf_map_area_free(dtab->dev_index_head);
|
||||
} else {
|
||||
for (i = 0; i < dtab->map.max_entries; i++) {
|
||||
struct bpf_dtab_netdev *dev;
|
||||
|
@@ -169,18 +169,18 @@ int __weak kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
|
||||
err = copy_from_kernel_nofault(bpt->saved_instr, (char *)bpt->bpt_addr,
|
||||
BREAK_INSTR_SIZE);
|
||||
if (err)
|
||||
return err;
|
||||
err = probe_kernel_write((char *)bpt->bpt_addr,
|
||||
err = copy_to_kernel_nofault((char *)bpt->bpt_addr,
|
||||
arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
|
||||
return err;
|
||||
}
|
||||
|
||||
int __weak kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
|
||||
{
|
||||
return probe_kernel_write((char *)bpt->bpt_addr,
|
||||
return copy_to_kernel_nofault((char *)bpt->bpt_addr,
|
||||
(char *)bpt->saved_instr, BREAK_INSTR_SIZE);
|
||||
}
|
||||
|
||||
@@ -587,6 +587,7 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
|
||||
arch_kgdb_ops.disable_hw_break(regs);
|
||||
|
||||
acquirelock:
|
||||
rcu_read_lock();
|
||||
/*
|
||||
* Interrupts will be restored by the 'trap return' code, except when
|
||||
* single stepping.
|
||||
@@ -646,6 +647,7 @@ return_normal:
|
||||
atomic_dec(&slaves_in_kgdb);
|
||||
dbg_touch_watchdogs();
|
||||
local_irq_restore(flags);
|
||||
rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
cpu_relax();
|
||||
@@ -664,6 +666,7 @@ return_normal:
|
||||
raw_spin_unlock(&dbg_master_lock);
|
||||
dbg_touch_watchdogs();
|
||||
local_irq_restore(flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
goto acquirelock;
|
||||
}
|
||||
@@ -787,6 +790,7 @@ kgdb_restore:
|
||||
raw_spin_unlock(&dbg_master_lock);
|
||||
dbg_touch_watchdogs();
|
||||
local_irq_restore(flags);
|
||||
rcu_read_unlock();
|
||||
|
||||
return kgdb_info[cpu].ret_state;
|
||||
}
|
||||
|
@@ -247,7 +247,7 @@ char *kgdb_mem2hex(char *mem, char *buf, int count)
|
||||
*/
|
||||
tmp = buf + count;
|
||||
|
||||
err = probe_kernel_read(tmp, mem, count);
|
||||
err = copy_from_kernel_nofault(tmp, mem, count);
|
||||
if (err)
|
||||
return NULL;
|
||||
while (count > 0) {
|
||||
@@ -283,7 +283,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
|
||||
*tmp_raw |= hex_to_bin(*tmp_hex--) << 4;
|
||||
}
|
||||
|
||||
return probe_kernel_write(mem, tmp_raw, count);
|
||||
return copy_to_kernel_nofault(mem, tmp_raw, count);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -335,7 +335,7 @@ static int kgdb_ebin2mem(char *buf, char *mem, int count)
|
||||
size++;
|
||||
}
|
||||
|
||||
return probe_kernel_write(mem, c, size);
|
||||
return copy_to_kernel_nofault(mem, c, size);
|
||||
}
|
||||
|
||||
#if DBG_MAX_REG_NUM > 0
|
||||
|
@@ -542,6 +542,44 @@ static int kdb_search_string(char *searched, char *searchfor)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void kdb_msg_write(const char *msg, int msg_len)
|
||||
{
|
||||
struct console *c;
|
||||
|
||||
if (msg_len == 0)
|
||||
return;
|
||||
|
||||
if (dbg_io_ops) {
|
||||
const char *cp = msg;
|
||||
int len = msg_len;
|
||||
|
||||
while (len--) {
|
||||
dbg_io_ops->write_char(*cp);
|
||||
cp++;
|
||||
}
|
||||
}
|
||||
|
||||
for_each_console(c) {
|
||||
if (!(c->flags & CON_ENABLED))
|
||||
continue;
|
||||
if (c == dbg_io_ops->cons)
|
||||
continue;
|
||||
/*
|
||||
* Set oops_in_progress to encourage the console drivers to
|
||||
* disregard their internal spin locks: in the current calling
|
||||
* context the risk of deadlock is a bigger problem than risks
|
||||
* due to re-entering the console driver. We operate directly on
|
||||
* oops_in_progress rather than using bust_spinlocks() because
|
||||
* the calls bust_spinlocks() makes on exit are not appropriate
|
||||
* for this calling context.
|
||||
*/
|
||||
++oops_in_progress;
|
||||
c->write(c, msg, msg_len);
|
||||
--oops_in_progress;
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
}
|
||||
|
||||
int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
|
||||
{
|
||||
int diag;
|
||||
@@ -553,7 +591,6 @@ int vkdb_printf(enum kdb_msgsrc src, const char *fmt, va_list ap)
|
||||
int this_cpu, old_cpu;
|
||||
char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
|
||||
char *moreprompt = "more> ";
|
||||
struct console *c;
|
||||
unsigned long uninitialized_var(flags);
|
||||
|
||||
/* Serialize kdb_printf if multiple cpus try to write at once.
|
||||
@@ -687,22 +724,11 @@ kdb_printit:
|
||||
*/
|
||||
retlen = strlen(kdb_buffer);
|
||||
cp = (char *) printk_skip_headers(kdb_buffer);
|
||||
if (!dbg_kdb_mode && kgdb_connected) {
|
||||
if (!dbg_kdb_mode && kgdb_connected)
|
||||
gdbstub_msg_write(cp, retlen - (cp - kdb_buffer));
|
||||
} else {
|
||||
if (dbg_io_ops && !dbg_io_ops->is_console) {
|
||||
len = retlen - (cp - kdb_buffer);
|
||||
cp2 = cp;
|
||||
while (len--) {
|
||||
dbg_io_ops->write_char(*cp2);
|
||||
cp2++;
|
||||
}
|
||||
}
|
||||
for_each_console(c) {
|
||||
c->write(c, cp, retlen - (cp - kdb_buffer));
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
}
|
||||
else
|
||||
kdb_msg_write(cp, retlen - (cp - kdb_buffer));
|
||||
|
||||
if (logging) {
|
||||
saved_loglevel = console_loglevel;
|
||||
console_loglevel = CONSOLE_LOGLEVEL_SILENT;
|
||||
@@ -751,19 +777,7 @@ kdb_printit:
|
||||
moreprompt = "more> ";
|
||||
|
||||
kdb_input_flush();
|
||||
|
||||
if (dbg_io_ops && !dbg_io_ops->is_console) {
|
||||
len = strlen(moreprompt);
|
||||
cp = moreprompt;
|
||||
while (len--) {
|
||||
dbg_io_ops->write_char(*cp);
|
||||
cp++;
|
||||
}
|
||||
}
|
||||
for_each_console(c) {
|
||||
c->write(c, moreprompt, strlen(moreprompt));
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
kdb_msg_write(moreprompt, strlen(moreprompt));
|
||||
|
||||
if (logging)
|
||||
printk("%s", moreprompt);
|
||||
|
@@ -2326,7 +2326,8 @@ void kdb_ps1(const struct task_struct *p)
|
||||
int cpu;
|
||||
unsigned long tmp;
|
||||
|
||||
if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
|
||||
if (!p ||
|
||||
copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
|
||||
return;
|
||||
|
||||
cpu = kdb_process_cpu(p);
|
||||
|
@@ -325,7 +325,7 @@ char *kdb_strdup(const char *str, gfp_t type)
|
||||
*/
|
||||
int kdb_getarea_size(void *res, unsigned long addr, size_t size)
|
||||
{
|
||||
int ret = probe_kernel_read((char *)res, (char *)addr, size);
|
||||
int ret = copy_from_kernel_nofault((char *)res, (char *)addr, size);
|
||||
if (ret) {
|
||||
if (!KDB_STATE(SUPPRESS)) {
|
||||
kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
|
||||
@@ -350,7 +350,7 @@ int kdb_getarea_size(void *res, unsigned long addr, size_t size)
|
||||
*/
|
||||
int kdb_putarea_size(unsigned long addr, void *res, size_t size)
|
||||
{
|
||||
int ret = probe_kernel_read((char *)addr, (char *)res, size);
|
||||
int ret = copy_from_kernel_nofault((char *)addr, (char *)res, size);
|
||||
if (ret) {
|
||||
if (!KDB_STATE(SUPPRESS)) {
|
||||
kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
|
||||
@@ -624,7 +624,8 @@ char kdb_task_state_char (const struct task_struct *p)
|
||||
char state;
|
||||
unsigned long tmp;
|
||||
|
||||
if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
|
||||
if (!p ||
|
||||
copy_from_kernel_nofault(&tmp, (char *)p, sizeof(unsigned long)))
|
||||
return 'E';
|
||||
|
||||
cpu = kdb_process_cpu(p);
|
||||
|
@@ -71,20 +71,21 @@ config SWIOTLB
|
||||
# in the pagetables
|
||||
#
|
||||
config DMA_NONCOHERENT_MMAP
|
||||
bool
|
||||
|
||||
config DMA_REMAP
|
||||
depends on MMU
|
||||
select GENERIC_ALLOCATOR
|
||||
select DMA_NONCOHERENT_MMAP
|
||||
default y if !MMU
|
||||
bool
|
||||
|
||||
config DMA_COHERENT_POOL
|
||||
select GENERIC_ALLOCATOR
|
||||
bool
|
||||
select DMA_REMAP
|
||||
|
||||
config DMA_REMAP
|
||||
bool
|
||||
depends on MMU
|
||||
select DMA_NONCOHERENT_MMAP
|
||||
|
||||
config DMA_DIRECT_REMAP
|
||||
bool
|
||||
select DMA_REMAP
|
||||
select DMA_COHERENT_POOL
|
||||
|
||||
config DMA_CMA
|
||||
|
@@ -109,14 +109,15 @@ static inline bool dma_should_free_from_pool(struct device *dev,
|
||||
return false;
|
||||
}
|
||||
|
||||
struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
gfp_t gfp, unsigned long attrs)
|
||||
{
|
||||
size_t alloc_size = PAGE_ALIGN(size);
|
||||
int node = dev_to_node(dev);
|
||||
struct page *page = NULL;
|
||||
u64 phys_limit;
|
||||
|
||||
WARN_ON_ONCE(!PAGE_ALIGNED(size));
|
||||
|
||||
if (attrs & DMA_ATTR_NO_WARN)
|
||||
gfp |= __GFP_NOWARN;
|
||||
|
||||
@@ -124,14 +125,14 @@ struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
gfp &= ~__GFP_ZERO;
|
||||
gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
|
||||
&phys_limit);
|
||||
page = dma_alloc_contiguous(dev, alloc_size, gfp);
|
||||
page = dma_alloc_contiguous(dev, size, gfp);
|
||||
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
|
||||
dma_free_contiguous(dev, page, alloc_size);
|
||||
dma_free_contiguous(dev, page, size);
|
||||
page = NULL;
|
||||
}
|
||||
again:
|
||||
if (!page)
|
||||
page = alloc_pages_node(node, gfp, get_order(alloc_size));
|
||||
page = alloc_pages_node(node, gfp, get_order(size));
|
||||
if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
|
||||
dma_free_contiguous(dev, page, size);
|
||||
page = NULL;
|
||||
@@ -157,9 +158,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
{
|
||||
struct page *page;
|
||||
void *ret;
|
||||
int err;
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
|
||||
if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
|
||||
ret = dma_alloc_from_pool(dev, PAGE_ALIGN(size), &page, gfp);
|
||||
ret = dma_alloc_from_pool(dev, size, &page, gfp);
|
||||
if (!ret)
|
||||
return NULL;
|
||||
goto done;
|
||||
@@ -183,14 +187,20 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
dma_alloc_need_uncached(dev, attrs)) ||
|
||||
(IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
|
||||
/* remove any dirty cache lines on the kernel alias */
|
||||
arch_dma_prep_coherent(page, PAGE_ALIGN(size));
|
||||
arch_dma_prep_coherent(page, size);
|
||||
|
||||
/* create a coherent mapping */
|
||||
ret = dma_common_contiguous_remap(page, PAGE_ALIGN(size),
|
||||
ret = dma_common_contiguous_remap(page, size,
|
||||
dma_pgprot(dev, PAGE_KERNEL, attrs),
|
||||
__builtin_return_address(0));
|
||||
if (!ret)
|
||||
goto out_free_pages;
|
||||
if (force_dma_unencrypted(dev)) {
|
||||
err = set_memory_decrypted((unsigned long)ret,
|
||||
1 << get_order(size));
|
||||
if (err)
|
||||
goto out_free_pages;
|
||||
}
|
||||
memset(ret, 0, size);
|
||||
goto done;
|
||||
}
|
||||
@@ -207,8 +217,12 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
}
|
||||
|
||||
ret = page_address(page);
|
||||
if (force_dma_unencrypted(dev))
|
||||
set_memory_decrypted((unsigned long)ret, 1 << get_order(size));
|
||||
if (force_dma_unencrypted(dev)) {
|
||||
err = set_memory_decrypted((unsigned long)ret,
|
||||
1 << get_order(size));
|
||||
if (err)
|
||||
goto out_free_pages;
|
||||
}
|
||||
|
||||
memset(ret, 0, size);
|
||||
|
||||
@@ -217,7 +231,7 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size,
|
||||
arch_dma_prep_coherent(page, size);
|
||||
ret = arch_dma_set_uncached(ret, size);
|
||||
if (IS_ERR(ret))
|
||||
goto out_free_pages;
|
||||
goto out_encrypt_pages;
|
||||
}
|
||||
done:
|
||||
if (force_dma_unencrypted(dev))
|
||||
@@ -225,6 +239,15 @@ done:
|
||||
else
|
||||
*dma_handle = phys_to_dma(dev, page_to_phys(page));
|
||||
return ret;
|
||||
|
||||
out_encrypt_pages:
|
||||
if (force_dma_unencrypted(dev)) {
|
||||
err = set_memory_encrypted((unsigned long)page_address(page),
|
||||
1 << get_order(size));
|
||||
/* If memory cannot be re-encrypted, it must be leaked */
|
||||
if (err)
|
||||
return NULL;
|
||||
}
|
||||
out_free_pages:
|
||||
dma_free_contiguous(dev, page, size);
|
||||
return NULL;
|
||||
@@ -459,7 +482,6 @@ int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
bool dma_direct_can_mmap(struct device *dev)
|
||||
{
|
||||
return dev_is_dma_coherent(dev) ||
|
||||
@@ -485,19 +507,6 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
|
||||
return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
|
||||
user_count << PAGE_SHIFT, vma->vm_page_prot);
|
||||
}
|
||||
#else /* CONFIG_MMU */
|
||||
bool dma_direct_can_mmap(struct device *dev)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
|
||||
void *cpu_addr, dma_addr_t dma_addr, size_t size,
|
||||
unsigned long attrs)
|
||||
{
|
||||
return -ENXIO;
|
||||
}
|
||||
#endif /* CONFIG_MMU */
|
||||
|
||||
int dma_direct_supported(struct device *dev, u64 mask)
|
||||
{
|
||||
|
@@ -175,10 +175,9 @@ static int __init dma_atomic_pool_init(void)
|
||||
* sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1.
|
||||
*/
|
||||
if (!atomic_pool_size) {
|
||||
atomic_pool_size = max(totalram_pages() >> PAGE_SHIFT, 1UL) *
|
||||
SZ_128K;
|
||||
atomic_pool_size = min_t(size_t, atomic_pool_size,
|
||||
1 << (PAGE_SHIFT + MAX_ORDER-1));
|
||||
unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K);
|
||||
pages = min_t(unsigned long, pages, MAX_ORDER_NR_PAGES);
|
||||
atomic_pool_size = max_t(size_t, pages << PAGE_SHIFT, SZ_128K);
|
||||
}
|
||||
INIT_WORK(&atomic_pool_work, atomic_pool_work_fn);
|
||||
|
||||
|
@@ -24,7 +24,8 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
|
||||
{
|
||||
void *vaddr;
|
||||
|
||||
vaddr = vmap(pages, size >> PAGE_SHIFT, VM_DMA_COHERENT, prot);
|
||||
vaddr = vmap(pages, PAGE_ALIGN(size) >> PAGE_SHIFT,
|
||||
VM_DMA_COHERENT, prot);
|
||||
if (vaddr)
|
||||
find_vm_area(vaddr)->pages = pages;
|
||||
return vaddr;
|
||||
@@ -37,7 +38,7 @@ void *dma_common_pages_remap(struct page **pages, size_t size,
|
||||
void *dma_common_contiguous_remap(struct page *page, size_t size,
|
||||
pgprot_t prot, const void *caller)
|
||||
{
|
||||
int count = size >> PAGE_SHIFT;
|
||||
int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
|
||||
struct page **pages;
|
||||
void *vaddr;
|
||||
int i;
|
||||
|
@@ -1977,7 +1977,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
* to stop root fork bombs.
|
||||
*/
|
||||
retval = -EAGAIN;
|
||||
if (nr_threads >= max_threads)
|
||||
if (data_race(nr_threads >= max_threads))
|
||||
goto bad_fork_cleanup_count;
|
||||
|
||||
delayacct_tsk_init(p); /* Must remain after dup_task_struct() */
|
||||
|
@@ -181,34 +181,19 @@ void kimage_file_post_load_cleanup(struct kimage *image)
|
||||
static int
|
||||
kimage_validate_signature(struct kimage *image)
|
||||
{
|
||||
const char *reason;
|
||||
int ret;
|
||||
|
||||
ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
|
||||
image->kernel_buf_len);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
break;
|
||||
if (ret) {
|
||||
|
||||
/* Certain verification errors are non-fatal if we're not
|
||||
* checking errors, provided we aren't mandating that there
|
||||
* must be a valid signature.
|
||||
*/
|
||||
case -ENODATA:
|
||||
reason = "kexec of unsigned image";
|
||||
goto decide;
|
||||
case -ENOPKG:
|
||||
reason = "kexec of image with unsupported crypto";
|
||||
goto decide;
|
||||
case -ENOKEY:
|
||||
reason = "kexec of image with unavailable key";
|
||||
decide:
|
||||
if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
|
||||
pr_notice("%s rejected\n", reason);
|
||||
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* If IMA is guaranteed to appraise a signature on the kexec
|
||||
/*
|
||||
* If IMA is guaranteed to appraise a signature on the kexec
|
||||
* image, permit it even if the kernel is otherwise locked
|
||||
* down.
|
||||
*/
|
||||
@@ -216,17 +201,10 @@ kimage_validate_signature(struct kimage *image)
|
||||
security_locked_down(LOCKDOWN_KEXEC))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
|
||||
/* All other errors are fatal, including nomem, unparseable
|
||||
* signatures and signature check failures - even if signatures
|
||||
* aren't required.
|
||||
*/
|
||||
default:
|
||||
pr_notice("kernel signature verification failed (%d).\n", ret);
|
||||
pr_debug("kernel signature verification failed (%d).\n", ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@@ -46,6 +46,11 @@
|
||||
|
||||
|
||||
static int kprobes_initialized;
|
||||
/* kprobe_table can be accessed by
|
||||
* - Normal hlist traversal and RCU add/del under kprobe_mutex is held.
|
||||
* Or
|
||||
* - RCU hlist traversal under disabling preempt (breakpoint handlers)
|
||||
*/
|
||||
static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
|
||||
static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
|
||||
|
||||
@@ -326,7 +331,8 @@ struct kprobe *get_kprobe(void *addr)
|
||||
struct kprobe *p;
|
||||
|
||||
head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)];
|
||||
hlist_for_each_entry_rcu(p, head, hlist) {
|
||||
hlist_for_each_entry_rcu(p, head, hlist,
|
||||
lockdep_is_held(&kprobe_mutex)) {
|
||||
if (p->addr == addr)
|
||||
return p;
|
||||
}
|
||||
@@ -586,11 +592,12 @@ static void kprobe_optimizer(struct work_struct *work)
|
||||
mutex_unlock(&module_mutex);
|
||||
mutex_unlock(&text_mutex);
|
||||
cpus_read_unlock();
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
|
||||
/* Step 5: Kick optimizer again if needed */
|
||||
if (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list))
|
||||
kick_kprobe_optimizer();
|
||||
|
||||
mutex_unlock(&kprobe_mutex);
|
||||
}
|
||||
|
||||
/* Wait for completing optimization and unoptimization */
|
||||
@@ -668,8 +675,6 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op)
|
||||
lockdep_assert_cpus_held();
|
||||
arch_unoptimize_kprobe(op);
|
||||
op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED;
|
||||
if (kprobe_disabled(&op->kp))
|
||||
arch_disarm_kprobe(&op->kp);
|
||||
}
|
||||
|
||||
/* Unoptimize a kprobe if p is optimized */
|
||||
@@ -849,7 +854,7 @@ static void optimize_all_kprobes(void)
|
||||
kprobes_allow_optimization = true;
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, head, hlist)
|
||||
hlist_for_each_entry(p, head, hlist)
|
||||
if (!kprobe_disabled(p))
|
||||
optimize_kprobe(p);
|
||||
}
|
||||
@@ -876,7 +881,7 @@ static void unoptimize_all_kprobes(void)
|
||||
kprobes_allow_optimization = false;
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, head, hlist) {
|
||||
hlist_for_each_entry(p, head, hlist) {
|
||||
if (!kprobe_disabled(p))
|
||||
unoptimize_kprobe(p, false);
|
||||
}
|
||||
@@ -1236,6 +1241,26 @@ __releases(hlist_lock)
|
||||
}
|
||||
NOKPROBE_SYMBOL(kretprobe_table_unlock);
|
||||
|
||||
struct kprobe kprobe_busy = {
|
||||
.addr = (void *) get_kprobe,
|
||||
};
|
||||
|
||||
void kprobe_busy_begin(void)
|
||||
{
|
||||
struct kprobe_ctlblk *kcb;
|
||||
|
||||
preempt_disable();
|
||||
__this_cpu_write(current_kprobe, &kprobe_busy);
|
||||
kcb = get_kprobe_ctlblk();
|
||||
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
|
||||
}
|
||||
|
||||
void kprobe_busy_end(void)
|
||||
{
|
||||
__this_cpu_write(current_kprobe, NULL);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from finish_task_switch when task tk becomes dead,
|
||||
* so that we can recycle any function-return probe instances associated
|
||||
@@ -1253,6 +1278,8 @@ void kprobe_flush_task(struct task_struct *tk)
|
||||
/* Early boot. kretprobe_table_locks not yet initialized. */
|
||||
return;
|
||||
|
||||
kprobe_busy_begin();
|
||||
|
||||
INIT_HLIST_HEAD(&empty_rp);
|
||||
hash = hash_ptr(tk, KPROBE_HASH_BITS);
|
||||
head = &kretprobe_inst_table[hash];
|
||||
@@ -1266,6 +1293,8 @@ void kprobe_flush_task(struct task_struct *tk)
|
||||
hlist_del(&ri->hlist);
|
||||
kfree(ri);
|
||||
}
|
||||
|
||||
kprobe_busy_end();
|
||||
}
|
||||
NOKPROBE_SYMBOL(kprobe_flush_task);
|
||||
|
||||
@@ -1499,12 +1528,14 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *ap, *list_p;
|
||||
|
||||
lockdep_assert_held(&kprobe_mutex);
|
||||
|
||||
ap = get_kprobe(p->addr);
|
||||
if (unlikely(!ap))
|
||||
return NULL;
|
||||
|
||||
if (p != ap) {
|
||||
list_for_each_entry_rcu(list_p, &ap->list, list)
|
||||
list_for_each_entry(list_p, &ap->list, list)
|
||||
if (list_p == p)
|
||||
/* kprobe p is a valid probe */
|
||||
goto valid;
|
||||
@@ -1669,7 +1700,9 @@ static int aggr_kprobe_disabled(struct kprobe *ap)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
list_for_each_entry_rcu(kp, &ap->list, list)
|
||||
lockdep_assert_held(&kprobe_mutex);
|
||||
|
||||
list_for_each_entry(kp, &ap->list, list)
|
||||
if (!kprobe_disabled(kp))
|
||||
/*
|
||||
* There is an active probe on the list.
|
||||
@@ -1748,7 +1781,7 @@ static int __unregister_kprobe_top(struct kprobe *p)
|
||||
else {
|
||||
/* If disabling probe has special handlers, update aggrprobe */
|
||||
if (p->post_handler && !kprobe_gone(p)) {
|
||||
list_for_each_entry_rcu(list_p, &ap->list, list) {
|
||||
list_for_each_entry(list_p, &ap->list, list) {
|
||||
if ((list_p != p) && (list_p->post_handler))
|
||||
goto noclean;
|
||||
}
|
||||
@@ -2062,13 +2095,15 @@ static void kill_kprobe(struct kprobe *p)
|
||||
{
|
||||
struct kprobe *kp;
|
||||
|
||||
lockdep_assert_held(&kprobe_mutex);
|
||||
|
||||
p->flags |= KPROBE_FLAG_GONE;
|
||||
if (kprobe_aggrprobe(p)) {
|
||||
/*
|
||||
* If this is an aggr_kprobe, we have to list all the
|
||||
* chained probes and mark them GONE.
|
||||
*/
|
||||
list_for_each_entry_rcu(kp, &p->list, list)
|
||||
list_for_each_entry(kp, &p->list, list)
|
||||
kp->flags |= KPROBE_FLAG_GONE;
|
||||
p->post_handler = NULL;
|
||||
kill_optimized_kprobe(p);
|
||||
@@ -2312,7 +2347,7 @@ static int kprobes_module_callback(struct notifier_block *nb,
|
||||
mutex_lock(&kprobe_mutex);
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
hlist_for_each_entry_rcu(p, head, hlist)
|
||||
hlist_for_each_entry(p, head, hlist)
|
||||
if (within_module_init((unsigned long)p->addr, mod) ||
|
||||
(checkcore &&
|
||||
within_module_core((unsigned long)p->addr, mod))) {
|
||||
@@ -2550,7 +2585,7 @@ static int arm_all_kprobes(void)
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
/* Arm all kprobes on a best-effort basis */
|
||||
hlist_for_each_entry_rcu(p, head, hlist) {
|
||||
hlist_for_each_entry(p, head, hlist) {
|
||||
if (!kprobe_disabled(p)) {
|
||||
err = arm_kprobe(p);
|
||||
if (err) {
|
||||
@@ -2593,7 +2628,7 @@ static int disarm_all_kprobes(void)
|
||||
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
|
||||
head = &kprobe_table[i];
|
||||
/* Disarm all kprobes on a best-effort basis */
|
||||
hlist_for_each_entry_rcu(p, head, hlist) {
|
||||
hlist_for_each_entry(p, head, hlist) {
|
||||
if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) {
|
||||
err = disarm_kprobe(p, false);
|
||||
if (err) {
|
||||
|
@@ -202,7 +202,7 @@ void *kthread_probe_data(struct task_struct *task)
|
||||
struct kthread *kthread = to_kthread(task);
|
||||
void *data = NULL;
|
||||
|
||||
probe_kernel_read(&data, &kthread->data, sizeof(data));
|
||||
copy_from_kernel_nofault(&data, &kthread->data, sizeof(data));
|
||||
return data;
|
||||
}
|
||||
|
||||
|
@@ -2783,7 +2783,9 @@ static void dynamic_debug_remove(struct module *mod, struct _ddebug *debug)
|
||||
|
||||
void * __weak module_alloc(unsigned long size)
|
||||
{
|
||||
return vmalloc_exec(size);
|
||||
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
|
||||
GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
|
||||
NUMA_NO_NODE, __builtin_return_address(0));
|
||||
}
|
||||
|
||||
bool __weak module_init_section(const char *name)
|
||||
|
@@ -531,7 +531,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
|
||||
} else if (!IS_ERR(pidfd_pid(file))) {
|
||||
err = check_setns_flags(flags);
|
||||
} else {
|
||||
err = -EBADF;
|
||||
err = -EINVAL;
|
||||
}
|
||||
if (err)
|
||||
goto out;
|
||||
|
@@ -335,7 +335,7 @@ static void padata_reorder(struct parallel_data *pd)
|
||||
*
|
||||
* Ensure reorder queue is read after pd->lock is dropped so we see
|
||||
* new objects from another task in padata_do_serial. Pairs with
|
||||
* smp_mb__after_atomic in padata_do_serial.
|
||||
* smp_mb in padata_do_serial.
|
||||
*/
|
||||
smp_mb();
|
||||
|
||||
@@ -418,7 +418,7 @@ void padata_do_serial(struct padata_priv *padata)
|
||||
* with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
|
||||
* in padata_reorder.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
smp_mb();
|
||||
|
||||
padata_reorder(pd);
|
||||
}
|
||||
|
@@ -974,16 +974,6 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence)
|
||||
user->idx = log_next_idx;
|
||||
user->seq = log_next_seq;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
/*
|
||||
* It isn't supported due to the record nature of this
|
||||
* interface: _SET _DATA and _END point to very specific
|
||||
* record positions, while _CUR would be more useful in case
|
||||
* of a byte-based log. Because of that, return the default
|
||||
* errno value for invalid seek operation.
|
||||
*/
|
||||
ret = -ESPIPE;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
@@ -723,7 +723,7 @@ kfree_perf_init(void)
|
||||
schedule_timeout_uninterruptible(1);
|
||||
}
|
||||
|
||||
pr_alert("kfree object size=%lu\n", kfree_mult * sizeof(struct kfree_obj));
|
||||
pr_alert("kfree object size=%zu\n", kfree_mult * sizeof(struct kfree_obj));
|
||||
|
||||
kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
|
||||
GFP_KERNEL);
|
||||
|
@@ -250,7 +250,7 @@ static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
* next idle sojourn.
|
||||
*/
|
||||
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
|
||||
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
// RCU is no longer watching. Better be in extended quiescent state!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
(seq & RCU_DYNTICK_CTRL_CTR));
|
||||
@@ -274,13 +274,13 @@ static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
* and we also must force ordering with the next RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
seq = arch_atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
|
||||
// RCU is now watching. Better not be in an extended quiescent state!
|
||||
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
|
||||
!(seq & RCU_DYNTICK_CTRL_CTR));
|
||||
if (seq & RCU_DYNTICK_CTRL_MASK) {
|
||||
atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
|
||||
arch_atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
|
||||
smp_mb__after_atomic(); /* _exit after clearing mask. */
|
||||
}
|
||||
}
|
||||
@@ -313,7 +313,7 @@ static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
|
||||
return !(arch_atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -633,6 +633,10 @@ static noinstr void rcu_eqs_enter(bool user)
|
||||
do_nocb_deferred_wakeup(rdp);
|
||||
rcu_prepare_for_idle();
|
||||
rcu_preempt_deferred_qs(current);
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
|
||||
// RCU is watching here ...
|
||||
@@ -692,6 +696,7 @@ noinstr void rcu_nmi_exit(void)
|
||||
{
|
||||
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
|
||||
|
||||
instrumentation_begin();
|
||||
/*
|
||||
* Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
|
||||
* (We are exiting an NMI handler, so RCU better be paying attention
|
||||
@@ -705,7 +710,6 @@ noinstr void rcu_nmi_exit(void)
|
||||
* leave it in non-RCU-idle state.
|
||||
*/
|
||||
if (rdp->dynticks_nmi_nesting != 1) {
|
||||
instrumentation_begin();
|
||||
trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2,
|
||||
atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
|
||||
@@ -714,13 +718,15 @@ noinstr void rcu_nmi_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
instrumentation_begin();
|
||||
/* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
|
||||
trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, atomic_read(&rdp->dynticks));
|
||||
WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
if (!in_nmi())
|
||||
rcu_prepare_for_idle();
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
instrumentation_end();
|
||||
|
||||
// RCU is watching here ...
|
||||
@@ -838,6 +844,10 @@ static void noinstr rcu_eqs_exit(bool user)
|
||||
rcu_dynticks_eqs_exit();
|
||||
// ... but is watching here.
|
||||
instrumentation_begin();
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
rcu_cleanup_after_idle();
|
||||
trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, atomic_read(&rdp->dynticks));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
@@ -983,13 +993,21 @@ noinstr void rcu_nmi_enter(void)
|
||||
if (!in_nmi())
|
||||
rcu_cleanup_after_idle();
|
||||
|
||||
instrumentation_begin();
|
||||
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
|
||||
instrument_atomic_read(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&rdp->dynticks, sizeof(rdp->dynticks));
|
||||
|
||||
incby = 1;
|
||||
} else if (!in_nmi()) {
|
||||
instrumentation_begin();
|
||||
rcu_irq_enter_check_tick();
|
||||
instrumentation_end();
|
||||
} else {
|
||||
instrumentation_begin();
|
||||
}
|
||||
instrumentation_begin();
|
||||
|
||||
trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
|
||||
rdp->dynticks_nmi_nesting,
|
||||
rdp->dynticks_nmi_nesting + incby, atomic_read(&rdp->dynticks));
|
||||
|
@@ -1313,9 +1313,6 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
void activate_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
if (task_contributes_to_load(p))
|
||||
rq->nr_uninterruptible--;
|
||||
|
||||
enqueue_task(rq, p, flags);
|
||||
|
||||
p->on_rq = TASK_ON_RQ_QUEUED;
|
||||
@@ -1325,9 +1322,6 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
{
|
||||
p->on_rq = (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING;
|
||||
|
||||
if (task_contributes_to_load(p))
|
||||
rq->nr_uninterruptible++;
|
||||
|
||||
dequeue_task(rq, p, flags);
|
||||
}
|
||||
|
||||
@@ -1629,7 +1623,7 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||
if (cpumask_equal(&p->cpus_mask, new_mask))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
@@ -2228,10 +2222,10 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
|
||||
|
||||
lockdep_assert_held(&rq->lock);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->sched_contributes_to_load)
|
||||
rq->nr_uninterruptible--;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
if (wake_flags & WF_MIGRATED)
|
||||
en_flags |= ENQUEUE_MIGRATED;
|
||||
#endif
|
||||
@@ -2285,8 +2279,15 @@ void sched_ttwu_pending(void *arg)
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
llist_for_each_entry_safe(p, t, llist, wake_entry)
|
||||
llist_for_each_entry_safe(p, t, llist, wake_entry.llist) {
|
||||
if (WARN_ON_ONCE(p->on_cpu))
|
||||
smp_cond_load_acquire(&p->on_cpu, !VAL);
|
||||
|
||||
if (WARN_ON_ONCE(task_cpu(p) != cpu_of(rq)))
|
||||
set_task_cpu(p, cpu_of(rq));
|
||||
|
||||
ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
|
||||
}
|
||||
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
@@ -2314,7 +2315,7 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
|
||||
p->sched_remote_wakeup = !!(wake_flags & WF_MIGRATED);
|
||||
|
||||
WRITE_ONCE(rq->ttwu_pending, 1);
|
||||
__smp_call_single_queue(cpu, &p->wake_entry);
|
||||
__smp_call_single_queue(cpu, &p->wake_entry.llist);
|
||||
}
|
||||
|
||||
void wake_up_if_idle(int cpu)
|
||||
@@ -2361,7 +2362,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
|
||||
* the soon-to-be-idle CPU as the current CPU is likely busy.
|
||||
* nr_running is checked to avoid unnecessary task stacking.
|
||||
*/
|
||||
if ((wake_flags & WF_ON_RQ) && cpu_rq(cpu)->nr_running <= 1)
|
||||
if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
@@ -2370,6 +2371,9 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags)
|
||||
static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
|
||||
{
|
||||
if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
|
||||
if (WARN_ON_ONCE(cpu == smp_processor_id()))
|
||||
return false;
|
||||
|
||||
sched_clock_cpu(cpu); /* Sync clocks across CPUs */
|
||||
__ttwu_queue_wakelist(p, cpu, wake_flags);
|
||||
return true;
|
||||
@@ -2520,7 +2524,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
goto out;
|
||||
|
||||
success = 1;
|
||||
cpu = task_cpu(p);
|
||||
trace_sched_waking(p);
|
||||
p->state = TASK_RUNNING;
|
||||
trace_sched_wakeup(p);
|
||||
@@ -2542,7 +2545,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
|
||||
/* We're going to change ->state: */
|
||||
success = 1;
|
||||
cpu = task_cpu(p);
|
||||
|
||||
/*
|
||||
* Ensure we load p->on_rq _after_ p->state, otherwise it would
|
||||
@@ -2567,7 +2569,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
* A similar smb_rmb() lives in try_invoke_on_locked_down_task().
|
||||
*/
|
||||
smp_rmb();
|
||||
if (p->on_rq && ttwu_remote(p, wake_flags))
|
||||
if (READ_ONCE(p->on_rq) && ttwu_remote(p, wake_flags))
|
||||
goto unlock;
|
||||
|
||||
if (p->in_iowait) {
|
||||
@@ -2576,9 +2578,6 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
p->sched_contributes_to_load = !!task_contributes_to_load(p);
|
||||
p->state = TASK_WAKING;
|
||||
|
||||
/*
|
||||
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
|
||||
* possible to, falsely, observe p->on_cpu == 0.
|
||||
@@ -2597,8 +2596,20 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
*
|
||||
* Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
|
||||
* __schedule(). See the comment for smp_mb__after_spinlock().
|
||||
*
|
||||
* Form a control-dep-acquire with p->on_rq == 0 above, to ensure
|
||||
* schedule()'s deactivate_task() has 'happened' and p will no longer
|
||||
* care about it's own p->state. See the comment in __schedule().
|
||||
*/
|
||||
smp_rmb();
|
||||
smp_acquire__after_ctrl_dep();
|
||||
|
||||
/*
|
||||
* We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
|
||||
* == 0), which means we need to do an enqueue, change p->state to
|
||||
* TASK_WAKING such that we can unlock p->pi_lock before doing the
|
||||
* enqueue, such as ttwu_queue_wakelist().
|
||||
*/
|
||||
p->state = TASK_WAKING;
|
||||
|
||||
/*
|
||||
* If the owning (remote) CPU is still in the middle of schedule() with
|
||||
@@ -2606,8 +2617,21 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
* which potentially sends an IPI instead of spinning on p->on_cpu to
|
||||
* let the waker make forward progress. This is safe because IRQs are
|
||||
* disabled and the IPI will deliver after on_cpu is cleared.
|
||||
*
|
||||
* Ensure we load task_cpu(p) after p->on_cpu:
|
||||
*
|
||||
* set_task_cpu(p, cpu);
|
||||
* STORE p->cpu = @cpu
|
||||
* __schedule() (switch to task 'p')
|
||||
* LOCK rq->lock
|
||||
* smp_mb__after_spin_lock() smp_cond_load_acquire(&p->on_cpu)
|
||||
* STORE p->on_cpu = 1 LOAD p->cpu
|
||||
*
|
||||
* to ensure we observe the correct CPU on which the task is currently
|
||||
* scheduling.
|
||||
*/
|
||||
if (READ_ONCE(p->on_cpu) && ttwu_queue_wakelist(p, cpu, wake_flags | WF_ON_RQ))
|
||||
if (smp_load_acquire(&p->on_cpu) &&
|
||||
ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
|
||||
goto unlock;
|
||||
|
||||
/*
|
||||
@@ -2627,6 +2651,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
psi_ttwu_dequeue(p);
|
||||
set_task_cpu(p, cpu);
|
||||
}
|
||||
#else
|
||||
cpu = task_cpu(p);
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
ttwu_queue(p, cpu, wake_flags);
|
||||
@@ -2634,7 +2660,7 @@ unlock:
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
out:
|
||||
if (success)
|
||||
ttwu_stat(p, cpu, wake_flags);
|
||||
ttwu_stat(p, task_cpu(p), wake_flags);
|
||||
preempt_enable();
|
||||
|
||||
return success;
|
||||
@@ -2755,7 +2781,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
#endif
|
||||
init_numa_balancing(clone_flags, p);
|
||||
#ifdef CONFIG_SMP
|
||||
p->wake_entry_type = CSD_TYPE_TTWU;
|
||||
p->wake_entry.u_flags = CSD_TYPE_TTWU;
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -2931,6 +2957,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
|
||||
* Silence PROVE_RCU.
|
||||
*/
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
rseq_migrate(p);
|
||||
/*
|
||||
* We're setting the CPU for the first time, we don't migrate,
|
||||
* so use __set_task_cpu().
|
||||
@@ -2995,6 +3022,7 @@ void wake_up_new_task(struct task_struct *p)
|
||||
* as we're not fully set-up yet.
|
||||
*/
|
||||
p->recent_used_cpu = task_cpu(p);
|
||||
rseq_migrate(p);
|
||||
__set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0));
|
||||
#endif
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
@@ -4065,6 +4093,7 @@ static void __sched notrace __schedule(bool preempt)
|
||||
{
|
||||
struct task_struct *prev, *next;
|
||||
unsigned long *switch_count;
|
||||
unsigned long prev_state;
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int cpu;
|
||||
@@ -4081,12 +4110,22 @@ static void __sched notrace __schedule(bool preempt)
|
||||
local_irq_disable();
|
||||
rcu_note_context_switch(preempt);
|
||||
|
||||
/* See deactivate_task() below. */
|
||||
prev_state = prev->state;
|
||||
|
||||
/*
|
||||
* Make sure that signal_pending_state()->signal_pending() below
|
||||
* can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
|
||||
* done by the caller to avoid the race with signal_wake_up().
|
||||
* done by the caller to avoid the race with signal_wake_up():
|
||||
*
|
||||
* The membarrier system call requires a full memory barrier
|
||||
* __set_current_state(@state) signal_wake_up()
|
||||
* schedule() set_tsk_thread_flag(p, TIF_SIGPENDING)
|
||||
* wake_up_state(p, state)
|
||||
* LOCK rq->lock LOCK p->pi_state
|
||||
* smp_mb__after_spinlock() smp_mb__after_spinlock()
|
||||
* if (signal_pending_state()) if (p->state & @state)
|
||||
*
|
||||
* Also, the membarrier system call requires a full memory barrier
|
||||
* after coming from user-space, before storing to rq->curr.
|
||||
*/
|
||||
rq_lock(rq, &rf);
|
||||
@@ -4097,10 +4136,31 @@ static void __sched notrace __schedule(bool preempt)
|
||||
update_rq_clock(rq);
|
||||
|
||||
switch_count = &prev->nivcsw;
|
||||
if (!preempt && prev->state) {
|
||||
if (signal_pending_state(prev->state, prev)) {
|
||||
/*
|
||||
* We must re-load prev->state in case ttwu_remote() changed it
|
||||
* before we acquired rq->lock.
|
||||
*/
|
||||
if (!preempt && prev_state && prev_state == prev->state) {
|
||||
if (signal_pending_state(prev_state, prev)) {
|
||||
prev->state = TASK_RUNNING;
|
||||
} else {
|
||||
prev->sched_contributes_to_load =
|
||||
(prev_state & TASK_UNINTERRUPTIBLE) &&
|
||||
!(prev_state & TASK_NOLOAD) &&
|
||||
!(prev->flags & PF_FROZEN);
|
||||
|
||||
if (prev->sched_contributes_to_load)
|
||||
rq->nr_uninterruptible++;
|
||||
|
||||
/*
|
||||
* __schedule() ttwu()
|
||||
* prev_state = prev->state; if (READ_ONCE(p->on_rq) && ...)
|
||||
* LOCK rq->lock goto out;
|
||||
* smp_mb__after_spinlock(); smp_acquire__after_ctrl_dep();
|
||||
* p->on_rq = 0; p->state = TASK_WAKING;
|
||||
*
|
||||
* After this, schedule() must not care about p->state any more.
|
||||
*/
|
||||
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK);
|
||||
|
||||
if (prev->in_iowait) {
|
||||
@@ -4524,7 +4584,8 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
|
||||
*/
|
||||
if (dl_prio(prio)) {
|
||||
if (!dl_prio(p->normal_prio) ||
|
||||
(pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
(pi_task && dl_prio(pi_task->prio) &&
|
||||
dl_entity_preempt(&pi_task->dl, &p->dl))) {
|
||||
p->dl.dl_boosted = 1;
|
||||
queue_flag |= ENQUEUE_REPLENISH;
|
||||
} else
|
||||
|
@@ -2763,6 +2763,7 @@ void __dl_clear_params(struct task_struct *p)
|
||||
dl_se->dl_bw = 0;
|
||||
dl_se->dl_density = 0;
|
||||
|
||||
dl_se->dl_boosted = 0;
|
||||
dl_se->dl_throttled = 0;
|
||||
dl_se->dl_yielded = 0;
|
||||
dl_se->dl_non_contending = 0;
|
||||
|
@@ -806,7 +806,7 @@ void post_init_entity_util_avg(struct task_struct *p)
|
||||
}
|
||||
}
|
||||
|
||||
sa->runnable_avg = cpu_scale;
|
||||
sa->runnable_avg = sa->util_avg;
|
||||
|
||||
if (p->sched_class != &fair_sched_class) {
|
||||
/*
|
||||
|
@@ -96,6 +96,15 @@ void __cpuidle default_idle_call(void)
|
||||
}
|
||||
}
|
||||
|
||||
static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
|
||||
struct cpuidle_device *dev)
|
||||
{
|
||||
if (current_clr_polling_and_test())
|
||||
return -EBUSY;
|
||||
|
||||
return cpuidle_enter_s2idle(drv, dev);
|
||||
}
|
||||
|
||||
static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
|
||||
int next_state)
|
||||
{
|
||||
@@ -171,11 +180,9 @@ static void cpuidle_idle_call(void)
|
||||
if (idle_should_enter_s2idle()) {
|
||||
rcu_idle_enter();
|
||||
|
||||
entered_state = cpuidle_enter_s2idle(drv, dev);
|
||||
if (entered_state > 0) {
|
||||
local_irq_enable();
|
||||
entered_state = call_cpuidle_s2idle(drv, dev);
|
||||
if (entered_state > 0)
|
||||
goto exit_idle;
|
||||
}
|
||||
|
||||
rcu_idle_exit();
|
||||
|
||||
|
@@ -1697,7 +1697,7 @@ static inline int task_on_rq_migrating(struct task_struct *p)
|
||||
#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
|
||||
#define WF_FORK 0x02 /* Child wakeup after fork */
|
||||
#define WF_MIGRATED 0x04 /* Internal use, task got migrated */
|
||||
#define WF_ON_RQ 0x08 /* Wakee is on_rq */
|
||||
#define WF_ON_CPU 0x08 /* Wakee is on_cpu */
|
||||
|
||||
/*
|
||||
* To aid in avoiding the subversion of "niceness" due to uneven distribution
|
||||
|
@@ -2529,9 +2529,6 @@ bool get_signal(struct ksignal *ksig)
|
||||
struct signal_struct *signal = current->signal;
|
||||
int signr;
|
||||
|
||||
if (unlikely(current->task_works))
|
||||
task_work_run();
|
||||
|
||||
if (unlikely(uprobe_deny_signal()))
|
||||
return false;
|
||||
|
||||
@@ -2544,6 +2541,13 @@ bool get_signal(struct ksignal *ksig)
|
||||
|
||||
relock:
|
||||
spin_lock_irq(&sighand->siglock);
|
||||
current->jobctl &= ~JOBCTL_TASK_WORK;
|
||||
if (unlikely(current->task_works)) {
|
||||
spin_unlock_irq(&sighand->siglock);
|
||||
task_work_run();
|
||||
goto relock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Every stopped thread goes here after wakeup. Check to see if
|
||||
* we should notify the parent, prepare_signal(SIGCONT) encodes
|
||||
|
18
kernel/smp.c
18
kernel/smp.c
@@ -669,24 +669,6 @@ void __init smp_init(void)
|
||||
{
|
||||
int num_nodes, num_cpus;
|
||||
|
||||
/*
|
||||
* Ensure struct irq_work layout matches so that
|
||||
* flush_smp_call_function_queue() can do horrible things.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
|
||||
offsetof(struct __call_single_data, llist));
|
||||
BUILD_BUG_ON(offsetof(struct irq_work, func) !=
|
||||
offsetof(struct __call_single_data, func));
|
||||
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
|
||||
offsetof(struct __call_single_data, flags));
|
||||
|
||||
/*
|
||||
* Assert the CSD_TYPE_TTWU layout is similar enough
|
||||
* for task_struct to be on the @call_single_queue.
|
||||
*/
|
||||
BUILD_BUG_ON(offsetof(struct task_struct, wake_entry_type) - offsetof(struct task_struct, wake_entry) !=
|
||||
offsetof(struct __call_single_data, flags) - offsetof(struct __call_single_data, llist));
|
||||
|
||||
idle_threads_init();
|
||||
cpuhp_threads_init();
|
||||
|
||||
|
@@ -25,9 +25,10 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */
|
||||
* 0 if succeeds or -ESRCH.
|
||||
*/
|
||||
int
|
||||
task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
|
||||
task_work_add(struct task_struct *task, struct callback_head *work, int notify)
|
||||
{
|
||||
struct callback_head *head;
|
||||
unsigned long flags;
|
||||
|
||||
do {
|
||||
head = READ_ONCE(task->task_works);
|
||||
@@ -36,8 +37,19 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
|
||||
work->next = head;
|
||||
} while (cmpxchg(&task->task_works, head, work) != head);
|
||||
|
||||
if (notify)
|
||||
switch (notify) {
|
||||
case TWA_RESUME:
|
||||
set_notify_resume(task);
|
||||
break;
|
||||
case TWA_SIGNAL:
|
||||
if (lock_task_sighand(task, &flags)) {
|
||||
task->jobctl |= JOBCTL_TASK_WORK;
|
||||
signal_wake_up(task, 0);
|
||||
unlock_task_sighand(task, &flags);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@@ -3,6 +3,9 @@
|
||||
* Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
|
||||
*
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
@@ -344,7 +347,8 @@ static int __blk_trace_remove(struct request_queue *q)
|
||||
{
|
||||
struct blk_trace *bt;
|
||||
|
||||
bt = xchg(&q->blk_trace, NULL);
|
||||
bt = rcu_replace_pointer(q->blk_trace, NULL,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (!bt)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -494,6 +498,17 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
|
||||
*/
|
||||
strreplace(buts->name, '/', '_');
|
||||
|
||||
/*
|
||||
* bdev can be NULL, as with scsi-generic, this is a helpful as
|
||||
* we can be.
|
||||
*/
|
||||
if (rcu_dereference_protected(q->blk_trace,
|
||||
lockdep_is_held(&q->blk_trace_mutex))) {
|
||||
pr_warn("Concurrent blktraces are not allowed on %s\n",
|
||||
buts->name);
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
bt = kzalloc(sizeof(*bt), GFP_KERNEL);
|
||||
if (!bt)
|
||||
return -ENOMEM;
|
||||
@@ -543,10 +558,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
|
||||
bt->pid = buts->pid;
|
||||
bt->trace_state = Blktrace_setup;
|
||||
|
||||
ret = -EBUSY;
|
||||
if (cmpxchg(&q->blk_trace, NULL, bt))
|
||||
goto err;
|
||||
|
||||
rcu_assign_pointer(q->blk_trace, bt);
|
||||
get_probe_ref();
|
||||
|
||||
ret = 0;
|
||||
@@ -1629,7 +1641,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
|
||||
{
|
||||
struct blk_trace *bt;
|
||||
|
||||
bt = xchg(&q->blk_trace, NULL);
|
||||
bt = rcu_replace_pointer(q->blk_trace, NULL,
|
||||
lockdep_is_held(&q->blk_trace_mutex));
|
||||
if (bt == NULL)
|
||||
return -EINVAL;
|
||||
|
||||
@@ -1661,10 +1674,7 @@ static int blk_trace_setup_queue(struct request_queue *q,
|
||||
|
||||
blk_trace_setup_lba(bt, bdev);
|
||||
|
||||
ret = -EBUSY;
|
||||
if (cmpxchg(&q->blk_trace, NULL, bt))
|
||||
goto free_bt;
|
||||
|
||||
rcu_assign_pointer(q->blk_trace, bt);
|
||||
get_probe_ref();
|
||||
return 0;
|
||||
|
||||
|
@@ -141,7 +141,7 @@ bpf_probe_read_user_common(void *dst, u32 size, const void __user *unsafe_ptr)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = probe_user_read(dst, unsafe_ptr, size);
|
||||
ret = copy_from_user_nofault(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
memset(dst, 0, size);
|
||||
return ret;
|
||||
@@ -196,7 +196,7 @@ bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr)
|
||||
|
||||
if (unlikely(ret < 0))
|
||||
goto fail;
|
||||
ret = probe_kernel_read(dst, unsafe_ptr, size);
|
||||
ret = copy_from_kernel_nofault(dst, unsafe_ptr, size);
|
||||
if (unlikely(ret < 0))
|
||||
goto fail;
|
||||
return ret;
|
||||
@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
|
||||
if (unlikely(ret < 0))
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
fail:
|
||||
memset(dst, 0, size);
|
||||
return ret;
|
||||
@@ -326,7 +326,7 @@ BPF_CALL_3(bpf_probe_write_user, void __user *, unsafe_ptr, const void *, src,
|
||||
if (unlikely(!nmi_uaccess_okay()))
|
||||
return -EPERM;
|
||||
|
||||
return probe_user_write(unsafe_ptr, src, size);
|
||||
return copy_to_user_nofault(unsafe_ptr, src, size);
|
||||
}
|
||||
|
||||
static const struct bpf_func_proto bpf_probe_write_user_proto = {
|
||||
@@ -661,7 +661,7 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
|
||||
|
||||
copy_size = (fmt[i + 2] == '4') ? 4 : 16;
|
||||
|
||||
err = probe_kernel_read(bufs->buf[memcpy_cnt],
|
||||
err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
|
||||
(void *) (long) args[fmt_cnt],
|
||||
copy_size);
|
||||
if (err < 0)
|
||||
|
@@ -2260,7 +2260,7 @@ ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
|
||||
|
||||
if (hash_contains_ip(ip, op->func_hash))
|
||||
return op;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -3599,7 +3599,7 @@ static int t_show(struct seq_file *m, void *v)
|
||||
if (direct)
|
||||
seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
seq_putc(m, '\n');
|
||||
|
||||
@@ -7151,6 +7151,10 @@ static int pid_open(struct inode *inode, struct file *file, int type)
|
||||
case TRACE_NO_PIDS:
|
||||
seq_ops = &ftrace_no_pid_sops;
|
||||
break;
|
||||
default:
|
||||
trace_array_put(tr);
|
||||
WARN_ON_ONCE(1);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = seq_open(file, seq_ops);
|
||||
@@ -7229,6 +7233,10 @@ pid_write(struct file *filp, const char __user *ubuf,
|
||||
other_pids = rcu_dereference_protected(tr->function_pids,
|
||||
lockdep_is_held(&ftrace_lock));
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
WARN_ON_ONCE(1);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
|
||||
|
@@ -2427,7 +2427,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
|
||||
if (unlikely(info->add_timestamp)) {
|
||||
bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer);
|
||||
|
||||
event = rb_add_time_stamp(event, info->delta, abs);
|
||||
event = rb_add_time_stamp(event, abs ? info->delta : delta, abs);
|
||||
length -= RB_LEN_TIME_EXTEND;
|
||||
delta = 0;
|
||||
}
|
||||
|
@@ -3570,7 +3570,6 @@ static void *s_next(struct seq_file *m, void *v, loff_t *pos)
|
||||
|
||||
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
|
||||
{
|
||||
struct ring_buffer_event *event;
|
||||
struct ring_buffer_iter *buf_iter;
|
||||
unsigned long entries = 0;
|
||||
u64 ts;
|
||||
@@ -3588,7 +3587,7 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
|
||||
* that a reset never took place on a cpu. This is evident
|
||||
* by the timestamp being before the start of the buffer.
|
||||
*/
|
||||
while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
|
||||
while (ring_buffer_iter_peek(buf_iter, &ts)) {
|
||||
if (ts >= iter->array_buffer->time_start)
|
||||
break;
|
||||
entries++;
|
||||
|
@@ -61,6 +61,9 @@ enum trace_type {
|
||||
#undef __field_desc
|
||||
#define __field_desc(type, container, item)
|
||||
|
||||
#undef __field_packed
|
||||
#define __field_packed(type, container, item)
|
||||
|
||||
#undef __array
|
||||
#define __array(type, item, size) type item[size];
|
||||
|
||||
|
@@ -101,12 +101,16 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
|
||||
kprobe_event_cmd_init(&cmd, buf, MAX_BUF_LEN);
|
||||
|
||||
ret = kprobe_event_gen_cmd_start(&cmd, event, val);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
pr_err("Failed to generate probe: %s\n", buf);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = kprobe_event_gen_cmd_end(&cmd);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
pr_err("Failed to add probe: %s\n", buf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -120,7 +124,7 @@ trace_boot_add_kprobe_event(struct xbc_node *node, const char *event)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_HIST_TRIGGERS
|
||||
#ifdef CONFIG_SYNTH_EVENTS
|
||||
static int __init
|
||||
trace_boot_add_synth_event(struct xbc_node *node, const char *event)
|
||||
{
|
||||
|
@@ -78,8 +78,8 @@ FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
|
||||
|
||||
F_STRUCT(
|
||||
__field_struct( struct ftrace_graph_ent, graph_ent )
|
||||
__field_desc( unsigned long, graph_ent, func )
|
||||
__field_desc( int, graph_ent, depth )
|
||||
__field_packed( unsigned long, graph_ent, func )
|
||||
__field_packed( int, graph_ent, depth )
|
||||
),
|
||||
|
||||
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
|
||||
@@ -92,11 +92,11 @@ FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
|
||||
|
||||
F_STRUCT(
|
||||
__field_struct( struct ftrace_graph_ret, ret )
|
||||
__field_desc( unsigned long, ret, func )
|
||||
__field_desc( unsigned long, ret, overrun )
|
||||
__field_desc( unsigned long long, ret, calltime)
|
||||
__field_desc( unsigned long long, ret, rettime )
|
||||
__field_desc( int, ret, depth )
|
||||
__field_packed( unsigned long, ret, func )
|
||||
__field_packed( unsigned long, ret, overrun )
|
||||
__field_packed( unsigned long long, ret, calltime)
|
||||
__field_packed( unsigned long long, ret, rettime )
|
||||
__field_packed( int, ret, depth )
|
||||
),
|
||||
|
||||
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
|
||||
|
@@ -216,11 +216,17 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file)
|
||||
|
||||
int trigger_process_regex(struct trace_event_file *file, char *buff)
|
||||
{
|
||||
char *command, *next = buff;
|
||||
char *command, *next;
|
||||
struct event_command *p;
|
||||
int ret = -EINVAL;
|
||||
|
||||
next = buff = skip_spaces(buff);
|
||||
command = strsep(&next, ": \t");
|
||||
if (next) {
|
||||
next = skip_spaces(next);
|
||||
if (!*next)
|
||||
next = NULL;
|
||||
}
|
||||
command = (command[0] != '!') ? command : command + 1;
|
||||
|
||||
mutex_lock(&trigger_cmd_mutex);
|
||||
@@ -630,8 +636,14 @@ event_trigger_callback(struct event_command *cmd_ops,
|
||||
int ret;
|
||||
|
||||
/* separate the trigger from the filter (t:n [if filter]) */
|
||||
if (param && isdigit(param[0]))
|
||||
if (param && isdigit(param[0])) {
|
||||
trigger = strsep(¶m, " \t");
|
||||
if (param) {
|
||||
param = skip_spaces(param);
|
||||
if (!*param)
|
||||
param = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
trigger_ops = cmd_ops->get_trigger_ops(cmd, trigger);
|
||||
|
||||
@@ -1368,6 +1380,11 @@ int event_enable_trigger_func(struct event_command *cmd_ops,
|
||||
trigger = strsep(¶m, " \t");
|
||||
if (!trigger)
|
||||
return -EINVAL;
|
||||
if (param) {
|
||||
param = skip_spaces(param);
|
||||
if (!*param)
|
||||
param = NULL;
|
||||
}
|
||||
|
||||
system = strsep(&trigger, ":");
|
||||
if (!trigger)
|
||||
|
@@ -45,6 +45,9 @@ static int ftrace_event_register(struct trace_event_call *call,
|
||||
#undef __field_desc
|
||||
#define __field_desc(type, container, item) type item;
|
||||
|
||||
#undef __field_packed
|
||||
#define __field_packed(type, container, item) type item;
|
||||
|
||||
#undef __array
|
||||
#define __array(type, item, size) type item[size];
|
||||
|
||||
@@ -85,6 +88,13 @@ static void __always_unused ____ftrace_check_##name(void) \
|
||||
.size = sizeof(_type), .align = __alignof__(_type), \
|
||||
is_signed_type(_type), .filter_type = _filter_type },
|
||||
|
||||
|
||||
#undef __field_ext_packed
|
||||
#define __field_ext_packed(_type, _item, _filter_type) { \
|
||||
.type = #_type, .name = #_item, \
|
||||
.size = sizeof(_type), .align = 1, \
|
||||
is_signed_type(_type), .filter_type = _filter_type },
|
||||
|
||||
#undef __field
|
||||
#define __field(_type, _item) __field_ext(_type, _item, FILTER_OTHER)
|
||||
|
||||
@@ -94,6 +104,9 @@ static void __always_unused ____ftrace_check_##name(void) \
|
||||
#undef __field_desc
|
||||
#define __field_desc(_type, _container, _item) __field_ext(_type, _item, FILTER_OTHER)
|
||||
|
||||
#undef __field_packed
|
||||
#define __field_packed(_type, _container, _item) __field_ext_packed(_type, _item, FILTER_OTHER)
|
||||
|
||||
#undef __array
|
||||
#define __array(_type, _item, _len) { \
|
||||
.type = #_type"["__stringify(_len)"]", .name = #_item, \
|
||||
@@ -129,6 +142,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
|
||||
#undef __field_desc
|
||||
#define __field_desc(type, container, item)
|
||||
|
||||
#undef __field_packed
|
||||
#define __field_packed(type, container, item)
|
||||
|
||||
#undef __array
|
||||
#define __array(type, item, len)
|
||||
|
||||
|
@@ -42,7 +42,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)
|
||||
if (!ops)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Currently only the non stack verision is supported */
|
||||
/* Currently only the non stack version is supported */
|
||||
ops->func = function_trace_call;
|
||||
ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;
|
||||
|
||||
|
@@ -1222,7 +1222,7 @@ fetch_store_strlen(unsigned long addr)
|
||||
#endif
|
||||
|
||||
do {
|
||||
ret = probe_kernel_read(&c, (u8 *)addr + len, 1);
|
||||
ret = copy_from_kernel_nofault(&c, (u8 *)addr + len, 1);
|
||||
len++;
|
||||
} while (c && ret == 0 && len < MAX_STRING_SIZE);
|
||||
|
||||
@@ -1290,7 +1290,7 @@ probe_mem_read_user(void *dest, void *src, size_t size)
|
||||
{
|
||||
const void __user *uaddr = (__force const void __user *)src;
|
||||
|
||||
return probe_user_read(dest, uaddr, size);
|
||||
return copy_from_user_nofault(dest, uaddr, size);
|
||||
}
|
||||
|
||||
static nokprobe_inline int
|
||||
@@ -1300,7 +1300,7 @@ probe_mem_read(void *dest, void *src, size_t size)
|
||||
if ((unsigned long)src < TASK_SIZE)
|
||||
return probe_mem_read_user(dest, src, size);
|
||||
#endif
|
||||
return probe_kernel_read(dest, src, size);
|
||||
return copy_from_kernel_nofault(dest, src, size);
|
||||
}
|
||||
|
||||
/* Note that we don't verify it, since the code does not come from user space */
|
||||
|
@@ -639,8 +639,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
|
||||
ret = -EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
|
||||
parg->count) {
|
||||
if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM ||
|
||||
code->op == FETCH_OP_DATA) || parg->count) {
|
||||
/*
|
||||
* IMM, DATA and COMM is pointing actual address, those
|
||||
* must be kept, and if parg->count != 0, this is an
|
||||
|
@@ -236,7 +236,7 @@ struct trace_probe_event {
|
||||
struct trace_event_call call;
|
||||
struct list_head files;
|
||||
struct list_head probes;
|
||||
struct trace_uprobe_filter filter[0];
|
||||
struct trace_uprobe_filter filter[];
|
||||
};
|
||||
|
||||
struct trace_probe {
|
||||
|
@@ -4638,11 +4638,11 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
|
||||
* Carefully copy the associated workqueue's workfn, name and desc.
|
||||
* Keep the original last '\0' in case the original is garbage.
|
||||
*/
|
||||
probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
|
||||
probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
|
||||
probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
|
||||
probe_kernel_read(name, wq->name, sizeof(name) - 1);
|
||||
probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
|
||||
copy_from_kernel_nofault(&fn, &worker->current_func, sizeof(fn));
|
||||
copy_from_kernel_nofault(&pwq, &worker->current_pwq, sizeof(pwq));
|
||||
copy_from_kernel_nofault(&wq, &pwq->wq, sizeof(wq));
|
||||
copy_from_kernel_nofault(name, wq->name, sizeof(name) - 1);
|
||||
copy_from_kernel_nofault(desc, worker->desc, sizeof(desc) - 1);
|
||||
|
||||
if (fn || name[0] || desc[0]) {
|
||||
printk("%sWorkqueue: %s %ps", log_lvl, name, fn);
|
||||
|
Reference in New Issue
Block a user