Merge branch 'perf/urgent' into perf/core, to pick up fixed and resolve conflicts

Conflicts:
	kernel/events/core.c

Signed-off-by: Ingo Molnar <mingo@kernel.org>
此提交包含在:
Ingo Molnar
2016-09-05 12:09:59 +02:00
當前提交 2cc538412a
共有 792 個檔案被更改,包括 8737 行新增4545 行删除

查看文件

@@ -19,6 +19,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/file.h>
#include <linux/kernel.h>
#include <linux/audit.h>
#include <linux/kthread.h>
@@ -544,10 +545,11 @@ int audit_exe_compare(struct task_struct *tsk, struct audit_fsnotify_mark *mark)
unsigned long ino;
dev_t dev;
rcu_read_lock();
exe_file = rcu_dereference(tsk->mm->exe_file);
exe_file = get_task_exe_file(tsk);
if (!exe_file)
return 0;
ino = exe_file->f_inode->i_ino;
dev = exe_file->f_inode->i_sb->s_dev;
rcu_read_unlock();
fput(exe_file);
return audit_mark_compare(mark, ino, dev);
}

查看文件

@@ -26,11 +26,18 @@ struct bpf_htab {
struct bucket *buckets;
void *elems;
struct pcpu_freelist freelist;
void __percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
};
enum extra_elem_state {
HTAB_NOT_AN_EXTRA_ELEM = 0,
HTAB_EXTRA_ELEM_FREE,
HTAB_EXTRA_ELEM_USED
};
/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
@@ -38,7 +45,10 @@ struct htab_elem {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
};
struct rcu_head rcu;
union {
struct rcu_head rcu;
enum extra_elem_state state;
};
u32 hash;
char key[0] __aligned(8);
};
@@ -113,6 +123,23 @@ free_elems:
return err;
}
static int alloc_extra_elems(struct bpf_htab *htab)
{
void __percpu *pptr;
int cpu;
pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;
for_each_possible_cpu(cpu) {
((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
HTAB_EXTRA_ELEM_FREE;
}
htab->extra_elems = pptr;
return 0;
}
/* Called from syscall */
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
@@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (percpu)
cost += (u64) round_up(htab->map.value_size, 8) *
num_possible_cpus() * htab->map.max_entries;
else
cost += (u64) htab->elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
@@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->buckets[i].lock);
}
if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
err = prealloc_elems_and_freelist(htab);
if (!percpu) {
err = alloc_extra_elems(htab);
if (err)
goto free_buckets;
}
if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
err = prealloc_elems_and_freelist(htab);
if (err)
goto free_extra_elems;
}
return &htab->map;
free_extra_elems:
free_percpu(htab->extra_elems);
free_buckets:
kvfree(htab->buckets);
free_htab:
@@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
kfree(l);
}
static void htab_elem_free_rcu(struct rcu_head *head)
@@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
if (l->state == HTAB_EXTRA_ELEM_USED) {
l->state = HTAB_EXTRA_ELEM_FREE;
return;
}
if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
@@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus)
bool percpu, bool onallcpus,
bool old_elem_exists)
{
u32 size = htab->map.value_size;
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
struct htab_elem *l_new;
void __percpu *pptr;
int err = 0;
if (prealloc) {
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
if (!l_new)
return ERR_PTR(-E2BIG);
err = -E2BIG;
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
return ERR_PTR(-E2BIG);
err = -E2BIG;
} else {
l_new = kmalloc(htab->elem_size,
GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return ERR_PTR(-ENOMEM);
}
l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return ERR_PTR(-ENOMEM);
}
if (err) {
if (!old_elem_exists)
return ERR_PTR(err);
/* if we're updating the existing element and the hash table
* is full, use per-cpu extra elems
*/
l_new = this_cpu_ptr(htab->extra_elems);
if (l_new->state != HTAB_EXTRA_ELEM_FREE)
return ERR_PTR(-E2BIG);
l_new->state = HTAB_EXTRA_ELEM_USED;
} else {
l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
}
memcpy(l_new->key, key, key_size);
@@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (ret)
goto err;
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
!!l_old);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
@@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
hash, true, onallcpus);
hash, true, onallcpus, false);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
@@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
htab_free_elems(htab);
pcpu_freelist_destroy(&htab->freelist);
}
free_percpu(htab->extra_elems);
kvfree(htab->buckets);
kfree(htab);
}

查看文件

@@ -194,6 +194,7 @@ struct verifier_env {
struct verifier_state_list **explored_states; /* search pruning optimization */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
u32 used_map_cnt; /* number of used maps */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
};
@@ -1052,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
if (func_id != BPF_FUNC_skb_in_cgroup)
if (func_id != BPF_FUNC_skb_under_cgroup)
goto error;
break;
default:
@@ -1074,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
case BPF_FUNC_skb_in_cgroup:
case BPF_FUNC_skb_under_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
break;
@@ -1301,7 +1302,7 @@ add_imm:
/* dst_reg stays as pkt_ptr type and since some positive
* integer value was added to the pointer, increment its 'id'
*/
dst_reg->id++;
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range and off to zero */
dst_reg->off = 0;

查看文件

@@ -1,4 +1,12 @@
# CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_KERNEL_GZIP is not set
# CONFIG_KERNEL_BZIP2 is not set
# CONFIG_KERNEL_LZMA is not set
CONFIG_KERNEL_XZ=y
# CONFIG_KERNEL_LZO is not set
# CONFIG_KERNEL_LZ4 is not set
CONFIG_OPTIMIZE_INLINING=y
# CONFIG_SLAB is not set
# CONFIG_SLUB is not set
CONFIG_SLOB=y

查看文件

@@ -2069,6 +2069,20 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
mutex_unlock(&cpuset_mutex);
}
/*
* Make sure the new task conform to the current state of its parent,
* which could have been changed by cpuset just after it inherits the
* state from the parent and before it sits on the cgroup's task list.
*/
void cpuset_fork(struct task_struct *task)
{
if (task_css_is_root(task, cpuset_cgrp_id))
return;
set_cpus_allowed_ptr(task, &current->cpus_allowed);
task->mems_allowed = current->mems_allowed;
}
struct cgroup_subsys cpuset_cgrp_subsys = {
.css_alloc = cpuset_css_alloc,
.css_online = cpuset_css_online,
@@ -2079,6 +2093,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
.attach = cpuset_attach,
.post_attach = cpuset_post_attach,
.bind = cpuset_bind,
.fork = cpuset_fork,
.legacy_cftypes = files,
.early_init = true,
};

查看文件

@@ -3578,10 +3578,18 @@ static int perf_event_read(struct perf_event *event, bool group)
cpu_to_read = find_cpu_to_read(event, local_cpu);
put_cpu();
ret = smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
/* The event must have been read from an online CPU: */
WARN_ON_ONCE(ret);
ret = ret ? : data.ret;
/*
* Purposely ignore the smp_call_function_single() return
* value.
*
* If event->oncpu isn't a valid CPU it means the event got
* scheduled out and that will have updated the event count.
*
* Therefore, either way, we'll have an up-to-date event count
* after this.
*/
(void)smp_call_function_single(cpu_to_read, __perf_event_read, &data, 1);
ret = data.ret;
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
struct perf_event_context *ctx = event->ctx;
unsigned long flags;
@@ -6196,7 +6204,7 @@ static int __perf_pmu_output_stop(void *info)
{
struct perf_event *event = info;
struct pmu *pmu = event->pmu;
struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
struct remote_output ro = {
.rb = event->rb,
};

查看文件

@@ -848,12 +848,7 @@ void do_exit(long code)
TASKS_RCU(preempt_enable());
exit_notify(tsk, group_dead);
proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
task_lock(tsk);
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
task_unlock(tsk);
#endif
mpol_put_task_policy(tsk);
#ifdef CONFIG_FUTEX
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);

查看文件

@@ -798,6 +798,29 @@ struct file *get_mm_exe_file(struct mm_struct *mm)
}
EXPORT_SYMBOL(get_mm_exe_file);
/**
* get_task_exe_file - acquire a reference to the task's executable file
*
* Returns %NULL if task's mm (if any) has no associated executable file or
* this is a kernel thread with borrowed mm (see the comment above get_task_mm).
* User must release file via fput().
*/
struct file *get_task_exe_file(struct task_struct *task)
{
struct file *exe_file = NULL;
struct mm_struct *mm;
task_lock(task);
mm = task->mm;
if (mm) {
if (!(task->flags & PF_KTHREAD))
exe_file = get_mm_exe_file(mm);
}
task_unlock(task);
return exe_file;
}
EXPORT_SYMBOL(get_task_exe_file);
/**
* get_task_mm - acquire a reference to the task's mm
*
@@ -913,14 +936,12 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
deactivate_mm(tsk, mm);
/*
* If we're exiting normally, clear a user-space tid field if
* requested. We leave this alone when dying by signal, to leave
* the value intact in a core dump, and to save the unnecessary
* trouble, say, a killed vfork parent shouldn't touch this mm.
* Userland only wants this done for a sys_exit.
* Signal userspace if we're not exiting with a core dump
* because we want to leave the value intact for debugging
* purposes.
*/
if (tsk->clear_child_tid) {
if (!(tsk->flags & PF_SIGNALED) &&
if (!(tsk->signal->flags & SIGNAL_GROUP_COREDUMP) &&
atomic_read(&mm->mm_users) > 1) {
/*
* We don't check the error code - if userspace has
@@ -1404,7 +1425,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
threadgroup_change_begin(current);
cgroup_fork(p);
#ifdef CONFIG_NUMA
p->mempolicy = mpol_dup(p->mempolicy);
@@ -1556,6 +1576,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
INIT_LIST_HEAD(&p->thread_group);
p->task_works = NULL;
threadgroup_change_begin(current);
/*
* Ensure that the cgroup subsystem policies allow the new process to be
* forked. It should be noted the the new process's css_set can be changed
@@ -1656,6 +1677,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
bad_fork_cancel_cgroup:
cgroup_cancel_fork(p);
bad_fork_free_pid:
threadgroup_change_end(current);
if (pid != &init_struct_pid)
free_pid(pid);
bad_fork_cleanup_thread:
@@ -1688,7 +1710,6 @@ bad_fork_cleanup_policy:
mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
threadgroup_change_end(current);
delayacct_tsk_free(p);
bad_fork_cleanup_count:
atomic_dec(&p->cred->user->processes);

查看文件

@@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
return NULL;
}
get_online_cpus();
if (max_vecs >= num_online_cpus()) {
cpumask_copy(affinity_mask, cpu_online_mask);
*nr_vecs = num_online_cpus();
@@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs)
}
*nr_vecs = vecs;
}
put_online_cpus();
return affinity_mask;
}

查看文件

@@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,
desc->name = name;
if (handle != handle_bad_irq && is_chained) {
/*
* We're about to start this interrupt immediately,
* hence the need to set the trigger configuration.
* But the .set_type callback may have overridden the
* flow handler, ignoring that we're dealing with a
* chained interrupt. Reset it immediately because we
* do know better.
*/
__irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data));
desc->handle_irq = handle;
irq_settings_set_noprobe(desc);
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);

查看文件

@@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
action->dev_id = dev_id;
retval = irq_chip_pm_get(&desc->irq_data);
if (retval < 0)
if (retval < 0) {
kfree(action);
return retval;
}
chip_bus_lock(desc);
retval = __setup_irq(irq, desc, action);
@@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler,
action->percpu_dev_id = dev_id;
retval = irq_chip_pm_get(&desc->irq_data);
if (retval < 0)
if (retval < 0) {
kfree(action);
return retval;
}
chip_bus_lock(desc);
retval = __setup_irq(irq, desc, action);

查看文件

@@ -887,7 +887,10 @@ int kexec_load_purgatory(struct kimage *image, unsigned long min,
return 0;
out:
vfree(pi->sechdrs);
pi->sechdrs = NULL;
vfree(pi->purgatory_buf);
pi->purgatory_buf = NULL;
return ret;
}

查看文件

@@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
*/
static bool rtree_next_node(struct memory_bitmap *bm)
{
bm->cur.node = list_entry(bm->cur.node->list.next,
struct rtree_node, list);
if (&bm->cur.node->list != &bm->cur.zone->leaves) {
if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) {
bm->cur.node = list_entry(bm->cur.node->list.next,
struct rtree_node, list);
bm->cur.node_pfn += BM_BITS_PER_BLOCK;
bm->cur.node_bit = 0;
touch_softlockup_watchdog();
@@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm)
}
/* No more nodes, goto next zone */
bm->cur.zone = list_entry(bm->cur.zone->list.next,
if (!list_is_last(&bm->cur.zone->list, &bm->zones)) {
bm->cur.zone = list_entry(bm->cur.zone->list.next,
struct mem_zone_bm_rtree, list);
if (&bm->cur.zone->list != &bm->zones) {
bm->cur.node = list_entry(bm->cur.zone->leaves.next,
struct rtree_node, list);
bm->cur.node_pfn = 0;

查看文件

@@ -9,10 +9,10 @@
char *_braille_console_setup(char **str, char **brl_options)
{
if (!memcmp(*str, "brl,", 4)) {
if (!strncmp(*str, "brl,", 4)) {
*brl_options = "";
*str += 4;
} else if (!memcmp(str, "brl=", 4)) {
} else if (!strncmp(*str, "brl=", 4)) {
*brl_options = *str + 4;
*str = strchr(*brl_options, ',');
if (!*str)

查看文件

@@ -99,26 +99,32 @@ again:
return add;
}
/*
* printk one line from the temporary buffer from @start index until
* and including the @end index.
*/
static void print_nmi_seq_line(struct nmi_seq_buf *s, int start, int end)
static void printk_nmi_flush_line(const char *text, int len)
{
const char *buf = s->buffer + start;
/*
* The buffers are flushed in NMI only on panic. The messages must
* go only into the ring buffer at this stage. Consoles will get
* explicitly called later when a crashdump is not generated.
*/
if (in_nmi())
printk_deferred("%.*s", (end - start) + 1, buf);
printk_deferred("%.*s", len, text);
else
printk("%.*s", (end - start) + 1, buf);
printk("%.*s", len, text);
}
/*
* printk one line from the temporary buffer from @start index until
* and including the @end index.
*/
static void printk_nmi_flush_seq_line(struct nmi_seq_buf *s,
int start, int end)
{
const char *buf = s->buffer + start;
printk_nmi_flush_line(buf, (end - start) + 1);
}
/*
* Flush data from the associated per_CPU buffer. The function
* can be called either via IRQ work or independently.
@@ -150,9 +156,11 @@ more:
* the buffer an unexpected way. If we printed something then
* @len must only increase.
*/
if (i && i >= len)
pr_err("printk_nmi_flush: internal error: i=%d >= len=%zu\n",
i, len);
if (i && i >= len) {
const char *msg = "printk_nmi_flush: internal error\n";
printk_nmi_flush_line(msg, strlen(msg));
}
if (!len)
goto out; /* Someone else has already flushed the buffer. */
@@ -166,14 +174,14 @@ more:
/* Print line by line. */
for (; i < size; i++) {
if (s->buffer[i] == '\n') {
print_nmi_seq_line(s, last_i, i);
printk_nmi_flush_seq_line(s, last_i, i);
last_i = i + 1;
}
}
/* Check if there was a partial line. */
if (last_i < size) {
print_nmi_seq_line(s, last_i, size - 1);
pr_cont("\n");
printk_nmi_flush_seq_line(s, last_i, size - 1);
printk_nmi_flush_line("\n", strlen("\n"));
}
/*

查看文件

@@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime)
cpustat[CPUTIME_IDLE] += (__force u64) cputime;
}
/*
* When a guest is interrupted for a longer amount of time, missed clock
* ticks are not redelivered later. Due to that, this function may on
* occasion account more time than the calling functions think elapsed.
*/
static __always_inline cputime_t steal_account_process_time(cputime_t maxtime)
{
#ifdef CONFIG_PARAVIRT
@@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* idle, or potentially user or system time. Due to rounding,
* other time can exceed ticks occasionally.
*/
other = account_other_time(cputime);
other = account_other_time(ULONG_MAX);
if (other >= cputime)
return;
cputime -= other;
@@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
}
cputime = cputime_one_jiffy;
steal = steal_account_process_time(cputime);
steal = steal_account_process_time(ULONG_MAX);
if (steal >= cputime)
return;
@@ -516,7 +521,7 @@ void account_idle_ticks(unsigned long ticks)
}
cputime = jiffies_to_cputime(ticks);
steal = steal_account_process_time(cputime);
steal = steal_account_process_time(ULONG_MAX);
if (steal >= cputime)
return;
@@ -614,19 +619,25 @@ static void cputime_adjust(struct task_cputime *curr,
stime = curr->stime;
utime = curr->utime;
if (utime == 0) {
stime = rtime;
/*
* If either stime or both stime and utime are 0, assume all runtime is
* userspace. Once a task gets some ticks, the monotonicy code at
* 'update' will ensure things converge to the observed ratio.
*/
if (stime == 0) {
utime = rtime;
goto update;
}
if (stime == 0) {
utime = rtime;
if (utime == 0) {
stime = rtime;
goto update;
}
stime = scale_stime((__force u64)stime, (__force u64)rtime,
(__force u64)(stime + utime));
update:
/*
* Make sure stime doesn't go backwards; this preserves monotonicity
* for utime because rtime is monotonic.
@@ -649,7 +660,6 @@ static void cputime_adjust(struct task_cputime *curr,
stime = rtime - utime;
}
update:
prev->stime = stime;
prev->utime = utime;
out:
@@ -694,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
unsigned long now = READ_ONCE(jiffies);
cputime_t delta, other;
/*
* Unlike tick based timing, vtime based timing never has lost
* ticks, and no need for steal time accounting to make up for
* lost ticks. Vtime accounts a rounded version of actual
* elapsed time. Limit account_other_time to prevent rounding
* errors from causing elapsed vtime to go negative.
*/
delta = jiffies_to_cputime(now - tsk->vtime_snap);
other = account_other_time(delta);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);

查看文件

@@ -605,12 +605,16 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
ptrace_event(PTRACE_EVENT_SECCOMP, data);
/*
* The delivery of a fatal signal during event
* notification may silently skip tracer notification.
* Terminating the task now avoids executing a system
* call that may not be intended.
* notification may silently skip tracer notification,
* which could leave us with a potentially unmodified
* syscall that the tracer would have liked to have
* changed. Since the process is about to die, we just
* force the syscall to be skipped and let the signal
* kill the process and correctly handle any tracer exit
* notifications.
*/
if (fatal_signal_pending(current))
do_exit(SIGSYS);
goto skip;
/* Check if the tracer forced the syscall to be skipped. */
this_syscall = syscall_get_nr(current, task_pt_regs(current));
if (this_syscall < 0)

查看文件

@@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
return 0;
}
static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
int *valp,
int write, void *data)
{
if (write) {
if (*negp)
return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
*lvalp = (unsigned long)val;
}
return 0;
}
static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
@@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
int proc_dointvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table,write,buffer,lenp,ppos,
NULL,NULL);
return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
}
/**
* proc_douintvec - read a vector of unsigned integers
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
* @buffer: the user buffer
* @lenp: the size of the user buffer
* @ppos: file position
*
* Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
* values from/to the user buffer, treated as an ASCII string.
*
* Returns 0 on success.
*/
int proc_douintvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return do_proc_dointvec(table, write, buffer, lenp, ppos,
do_proc_douintvec_conv, NULL);
}
/*
@@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write,
return -ENOSYS;
}
int proc_douintvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
return -ENOSYS;
}
int proc_dointvec_minmax(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
* exception granted :-)
*/
EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);

查看文件

@@ -908,10 +908,11 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
ktime_t now, expires;
int cpu = smp_processor_id();
now = tick_nohz_start_idle(ts);
if (can_stop_idle_tick(cpu, ts)) {
int was_stopped = ts->tick_stopped;
now = tick_nohz_start_idle(ts);
ts->idle_calls++;
expires = tick_nohz_stop_sched_tick(ts, now, cpu);

查看文件

@@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
do {
seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
now = ktime_to_ns(tkr->base);
now += clocksource_delta(tkr->read(tkr->clock),
tkr->cycle_last, tkr->mask);
} while (read_seqcount_retry(&tkf->seq, seq));
return now;

查看文件

@@ -23,7 +23,9 @@
#include "timekeeping_internal.h"
static unsigned int sleep_time_bin[32] = {0};
#define NUM_BINS 32
static unsigned int sleep_time_bin[NUM_BINS] = {0};
static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
{
@@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init);
void tk_debug_account_sleep_time(struct timespec64 *t)
{
sleep_time_bin[fls(t->tv_sec)]++;
/* Cap bin index so we don't overflow the array */
int bin = min(fls(t->tv_sec), NUM_BINS-1);
sleep_time_bin[bin]++;
}

查看文件

@@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
what |= MASK_TC_BIT(op_flags, META);
what |= MASK_TC_BIT(op_flags, PREFLUSH);
what |= MASK_TC_BIT(op_flags, FUA);
if (op == REQ_OP_DISCARD)
if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)
what |= BLK_TC_ACT(BLK_TC_DISCARD);
if (op == REQ_OP_FLUSH)
what |= BLK_TC_ACT(BLK_TC_FLUSH);