Merge branch 'master' into for-3.9-async

To receive f56c3196f2 ("async: fix
__lowest_in_progress()").

Signed-off-by: Tejun Heo <tj@kernel.org>
Цей коміт міститься в:
Tejun Heo
2013-01-23 09:31:01 -08:00
джерело 0fdff3ec6d 1d85490853
коміт c14afb82ff
3468 змінених файлів з 74250 додано та 31438 видалено

Переглянути файл

@@ -54,7 +54,7 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o
obj-$(CONFIG_MODULE_SIG) += module_signing.o modsign_pubkey.o modsign_certificate.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
@@ -137,10 +137,14 @@ ifeq ($(CONFIG_MODULE_SIG),y)
#
# Pull the signing certificate and any extra certificates into the kernel
#
extra_certificates:
touch $@
kernel/modsign_pubkey.o: signing_key.x509 extra_certificates
quiet_cmd_touch = TOUCH $@
cmd_touch = touch $@
extra_certificates:
$(call cmd,touch)
kernel/modsign_certificate.o: signing_key.x509 extra_certificates
###############################################################################
#

Переглянути файл

@@ -88,18 +88,27 @@ static atomic_t entry_count;
*/
static async_cookie_t __lowest_in_progress(struct async_domain *running)
{
async_cookie_t first_running = next_cookie; /* infinity value */
async_cookie_t first_pending = next_cookie; /* ditto */
struct async_entry *entry;
/*
* Both running and pending lists are sorted but not disjoint.
* Take the first cookies from both and return the min.
*/
if (!list_empty(&running->domain)) {
entry = list_first_entry(&running->domain, typeof(*entry), list);
return entry->cookie;
first_running = entry->cookie;
}
list_for_each_entry(entry, &async_pending, list)
if (entry->running == running)
return entry->cookie;
list_for_each_entry(entry, &async_pending, list) {
if (entry->running == running) {
first_pending = entry->cookie;
break;
}
}
return next_cookie; /* "infinity" value */
return min(first_running, first_pending);
}
static async_cookie_t lowest_in_progress(struct async_domain *running)
@@ -120,13 +129,17 @@ static void async_run_entry_fn(struct work_struct *work)
{
struct async_entry *entry =
container_of(work, struct async_entry, work);
struct async_entry *pos;
unsigned long flags;
ktime_t uninitialized_var(calltime), delta, rettime;
struct async_domain *running = entry->running;
/* 1) move self to the running queue */
/* 1) move self to the running queue, make sure it stays sorted */
spin_lock_irqsave(&async_lock, flags);
list_move_tail(&entry->list, &running->domain);
list_for_each_entry_reverse(pos, &running->domain, list)
if (entry->cookie < pos->cookie)
break;
list_move_tail(&entry->list, &pos->list);
spin_unlock_irqrestore(&async_lock, flags);
/* 2) run (and print duration) */
@@ -198,6 +211,9 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct a
atomic_inc(&entry_count);
spin_unlock_irqrestore(&async_lock, flags);
/* mark that this task has queued an async job, used by module init */
current->flags |= PF_USED_ASYNC;
/* schedule for execution */
queue_work(system_unbound_wq, &entry->work);

Переглянути файл

@@ -272,6 +272,8 @@ static int audit_log_config_change(char *function_name, int new, int old,
int rc = 0;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return rc;
audit_log_format(ab, "%s=%d old=%d auid=%u ses=%u", function_name, new,
old, from_kuid(&init_user_ns, loginuid), sessionid);
if (sid) {
@@ -619,6 +621,8 @@ static int audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type,
}
*ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
if (unlikely(!*ab))
return rc;
audit_log_format(*ab, "pid=%d uid=%u auid=%u ses=%u",
task_tgid_vnr(current),
from_kuid(&init_user_ns, current_uid()),
@@ -1097,6 +1101,23 @@ static inline void audit_get_stamp(struct audit_context *ctx,
}
}
/*
* Wait for auditd to drain the queue a little
*/
static void wait_for_auditd(unsigned long sleep_time)
{
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&audit_backlog_wait, &wait);
if (audit_backlog_limit &&
skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
schedule_timeout(sleep_time);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&audit_backlog_wait, &wait);
}
/* Obtain an audit buffer. This routine does locking to obtain the
* audit buffer, but then no locking is required for calls to
* audit_log_*format. If the tsk is a task that is currently in a
@@ -1142,20 +1163,13 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
while (audit_backlog_limit
&& skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) {
if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time
&& time_before(jiffies, timeout_start + audit_backlog_wait_time)) {
if (gfp_mask & __GFP_WAIT && audit_backlog_wait_time) {
unsigned long sleep_time;
/* Wait for auditd to drain the queue a little */
DECLARE_WAITQUEUE(wait, current);
set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&audit_backlog_wait, &wait);
if (audit_backlog_limit &&
skb_queue_len(&audit_skb_queue) > audit_backlog_limit)
schedule_timeout(timeout_start + audit_backlog_wait_time - jiffies);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&audit_backlog_wait, &wait);
sleep_time = timeout_start + audit_backlog_wait_time -
jiffies;
if ((long)sleep_time > 0)
wait_for_auditd(sleep_time);
continue;
}
if (audit_rate_check() && printk_ratelimit())

Переглянути файл

@@ -249,7 +249,7 @@ static void untag_chunk(struct node *p)
list_del_rcu(&chunk->hash);
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_destroy_mark(entry, audit_tree_group);
goto out;
}
@@ -291,7 +291,7 @@ static void untag_chunk(struct node *p)
owner->root = new;
spin_unlock(&hash_lock);
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(&new->mark); /* drop initial reference */
goto out;
@@ -331,7 +331,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
chunk->dead = 1;
spin_unlock(&entry->lock);
fsnotify_destroy_mark(entry);
fsnotify_destroy_mark(entry, audit_tree_group);
fsnotify_put_mark(entry);
return 0;
}
@@ -412,7 +412,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(chunk_entry);
fsnotify_destroy_mark(chunk_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry);
fsnotify_put_mark(old_entry);
@@ -443,17 +443,32 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
spin_unlock(&chunk_entry->lock);
spin_unlock(&old_entry->lock);
fsnotify_destroy_mark(old_entry);
fsnotify_destroy_mark(old_entry, audit_tree_group);
fsnotify_put_mark(chunk_entry); /* drop initial reference */
fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
return 0;
}
static void audit_log_remove_rule(struct audit_krule *rule)
{
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "op=");
audit_log_string(ab, "remove rule");
audit_log_format(ab, " dir=");
audit_log_untrustedstring(ab, rule->tree->pathname);
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
}
static void kill_rules(struct audit_tree *tree)
{
struct audit_krule *rule, *next;
struct audit_entry *entry;
struct audit_buffer *ab;
list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
entry = container_of(rule, struct audit_entry, rule);
@@ -461,14 +476,7 @@ static void kill_rules(struct audit_tree *tree)
list_del_init(&rule->rlist);
if (rule->tree) {
/* not a half-baked one */
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, "op=");
audit_log_string(ab, "remove rule");
audit_log_format(ab, " dir=");
audit_log_untrustedstring(ab, rule->tree->pathname);
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
audit_log_remove_rule(rule);
rule->tree = NULL;
list_del_rcu(&entry->list);
list_del(&entry->rule.list);

Переглянути файл

@@ -240,6 +240,8 @@ static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watc
if (audit_enabled) {
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "auid=%u ses=%u op=",
from_kuid(&init_user_ns, audit_get_loginuid(current)),
audit_get_sessionid(current));
@@ -350,7 +352,7 @@ static void audit_remove_parent_watches(struct audit_parent *parent)
}
mutex_unlock(&audit_filter_mutex);
fsnotify_destroy_mark(&parent->mark);
fsnotify_destroy_mark(&parent->mark, audit_watch_group);
}
/* Get path information necessary for adding watches. */
@@ -457,7 +459,7 @@ void audit_remove_watch_rule(struct audit_krule *krule)
if (list_empty(&parent->watches)) {
audit_get_parent(parent);
fsnotify_destroy_mark(&parent->mark);
fsnotify_destroy_mark(&parent->mark, audit_watch_group);
audit_put_parent(parent);
}
}

Переглянути файл

@@ -1144,7 +1144,6 @@ static void audit_log_rule_change(kuid_t loginuid, u32 sessionid, u32 sid,
* audit_receive_filter - apply all rules to the specified message type
* @type: audit message type
* @pid: target pid for netlink audit messages
* @uid: target uid for netlink audit messages
* @seq: netlink audit message sequence (serial) number
* @data: payload data
* @datasz: size of payload data

Переглянути файл

@@ -1464,14 +1464,14 @@ static void show_special(struct audit_context *context, int *call_panic)
audit_log_end(ab);
ab = audit_log_start(context, GFP_KERNEL,
AUDIT_IPC_SET_PERM);
if (unlikely(!ab))
return;
audit_log_format(ab,
"qbytes=%lx ouid=%u ogid=%u mode=%#ho",
context->ipc.qbytes,
context->ipc.perm_uid,
context->ipc.perm_gid,
context->ipc.perm_mode);
if (!ab)
return;
}
break; }
case AUDIT_MQ_OPEN: {
@@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags)
context->type = AUDIT_MMAP;
}
static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
static void audit_log_task(struct audit_buffer *ab)
{
kuid_t auid, uid;
kgid_t gid;
@@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
audit_log_task_context(ab);
audit_log_format(ab, " pid=%d comm=", current->pid);
audit_log_untrustedstring(ab, current->comm);
}
static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr)
{
audit_log_task(ab);
audit_log_format(ab, " reason=");
audit_log_string(ab, reason);
audit_log_format(ab, " sig=%ld", signr);
@@ -2715,6 +2720,8 @@ void audit_core_dumps(long signr)
return;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
if (unlikely(!ab))
return;
audit_log_abend(ab, "memory violation", signr);
audit_log_end(ab);
}
@@ -2723,8 +2730,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
audit_log_abend(ab, "seccomp", signr);
ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
if (unlikely(!ab))
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld", signr);
audit_log_format(ab, " syscall=%ld", syscall);
audit_log_format(ab, " compat=%d", is_compat_task());
audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current));

Переглянути файл

@@ -535,9 +535,11 @@ asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru)
return 0;
}
asmlinkage long
compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
struct compat_rusage __user *ru)
COMPAT_SYSCALL_DEFINE4(wait4,
compat_pid_t, pid,
compat_uint_t __user *, stat_addr,
int, options,
struct compat_rusage __user *, ru)
{
if (!ru) {
return sys_wait4(pid, stat_addr, options, NULL);
@@ -564,9 +566,10 @@ compat_sys_wait4(compat_pid_t pid, compat_uint_t __user *stat_addr, int options,
}
}
asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
struct compat_siginfo __user *uinfo, int options,
struct compat_rusage __user *uru)
COMPAT_SYSCALL_DEFINE5(waitid,
int, which, compat_pid_t, pid,
struct compat_siginfo __user *, uinfo, int, options,
struct compat_rusage __user *, uru)
{
siginfo_t info;
struct rusage ru;
@@ -584,7 +587,11 @@ asmlinkage long compat_sys_waitid(int which, compat_pid_t pid,
return ret;
if (uru) {
ret = put_compat_rusage(&ru, uru);
/* sys_waitid() overwrites everything in ru */
if (COMPAT_USE_64BIT_TIME)
ret = copy_to_user(uru, &ru, sizeof(ru));
else
ret = put_compat_rusage(&ru, uru);
if (ret)
return ret;
}
@@ -994,7 +1001,7 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
sigset_from_compat(&s, &s32);
if (uts) {
if (get_compat_timespec(&t, uts))
if (compat_get_timespec(&t, uts))
return -EFAULT;
}

Переглянути файл

@@ -372,6 +372,31 @@ error_put:
return ret;
}
static bool cred_cap_issubset(const struct cred *set, const struct cred *subset)
{
const struct user_namespace *set_ns = set->user_ns;
const struct user_namespace *subset_ns = subset->user_ns;
/* If the two credentials are in the same user namespace see if
* the capabilities of subset are a subset of set.
*/
if (set_ns == subset_ns)
return cap_issubset(subset->cap_permitted, set->cap_permitted);
/* The credentials are in a different user namespaces
* therefore one is a subset of the other only if a set is an
* ancestor of subset and set->euid is owner of subset or one
* of subsets ancestors.
*/
for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) {
if ((set_ns == subset_ns->parent) &&
uid_eq(subset_ns->owner, set->euid))
return true;
}
return false;
}
/**
* commit_creds - Install new credentials upon the current task
* @new: The credentials to be assigned
@@ -410,7 +435,7 @@ int commit_creds(struct cred *new)
!gid_eq(old->egid, new->egid) ||
!uid_eq(old->fsuid, new->fsuid) ||
!gid_eq(old->fsgid, new->fsgid) ||
!cap_issubset(new->cap_permitted, old->cap_permitted)) {
!cred_cap_issubset(old, new)) {
if (task->mm)
set_dumpable(task->mm, suid_dumpable);
task->pdeath_signal = 0;

Переглянути файл

@@ -1970,6 +1970,8 @@ static int kdb_lsmod(int argc, const char **argv)
kdb_printf("Module Size modstruct Used by\n");
list_for_each_entry(mod, kdb_modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
kdb_printf("%-20s%8u 0x%p ", mod->name,
mod->core_size, (void *)mod);

Переглянути файл

@@ -146,7 +146,7 @@ void __weak arch_release_thread_info(struct thread_info *ti)
static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
int node)
{
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
THREAD_SIZE_ORDER);
return page ? page_address(page) : NULL;
@@ -154,7 +154,7 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
static inline void free_thread_info(struct thread_info *ti)
{
free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
}
# else
static struct kmem_cache *thread_info_cache;
@@ -1166,6 +1166,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
current->signal->flags & SIGNAL_UNKILLABLE)
return ERR_PTR(-EINVAL);
/*
* If the new process will be in a different pid namespace
* don't allow the creation of threads.
*/
if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
(task_active_pid_ns(current) != current->nsproxy->pid_ns))
return ERR_PTR(-EINVAL);
retval = security_task_create(clone_flags);
if (retval)
goto fork_out;
@@ -1613,7 +1621,6 @@ long do_fork(unsigned long clone_flags,
return nr;
}
#ifdef CONFIG_GENERIC_KERNEL_THREAD
/*
* Create a kernel thread.
*/
@@ -1622,7 +1629,6 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
}
#endif
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
@@ -1662,8 +1668,10 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int, tls_val)
#endif
{
return do_fork(clone_flags, newsp, 0,
parent_tidptr, child_tidptr);
long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
asmlinkage_protect(5, ret, clone_flags, newsp,
parent_tidptr, child_tidptr, tls_val);
return ret;
}
#endif

Переглянути файл

@@ -818,7 +818,7 @@ static void irq_thread_dtor(struct callback_head *unused)
action = kthread_data(tsk);
pr_err("exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
tsk->comm, tsk->pid, action->irq);
desc = irq_to_desc(action->irq);

Переглянути файл

@@ -4,6 +4,7 @@
#include <linux/string.h>
#include <linux/random.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/cache.h>

Переглянути файл

@@ -228,9 +228,9 @@ static int ____call_usermodehelper(void *data)
commit_creds(new);
retval = kernel_execve(sub_info->path,
(const char *const *)sub_info->argv,
(const char *const *)sub_info->envp);
retval = do_execve(sub_info->path,
(const char __user *const __user *)sub_info->argv,
(const char __user *const __user *)sub_info->envp);
if (!retval)
return 0;

19
kernel/modsign_certificate.S Звичайний файл
Переглянути файл

@@ -0,0 +1,19 @@
/* SYMBOL_PREFIX defined on commandline from CONFIG_SYMBOL_PREFIX */
#ifndef SYMBOL_PREFIX
#define ASM_SYMBOL(sym) sym
#else
#define PASTE2(x,y) x##y
#define PASTE(x,y) PASTE2(x,y)
#define ASM_SYMBOL(sym) PASTE(SYMBOL_PREFIX, sym)
#endif
#define GLOBAL(name) \
.globl ASM_SYMBOL(name); \
ASM_SYMBOL(name):
.section ".init.data","aw"
GLOBAL(modsign_certificate_list)
.incbin "signing_key.x509"
.incbin "extra_certificates"
GLOBAL(modsign_certificate_list_end)

Переглянути файл

@@ -20,12 +20,6 @@ struct key *modsign_keyring;
extern __initdata const u8 modsign_certificate_list[];
extern __initdata const u8 modsign_certificate_list_end[];
asm(".section .init.data,\"aw\"\n"
SYMBOL_PREFIX "modsign_certificate_list:\n"
".incbin \"signing_key.x509\"\n"
".incbin \"extra_certificates\"\n"
SYMBOL_PREFIX "modsign_certificate_list_end:"
);
/*
* We need to make sure ccache doesn't cache the .o file as it doesn't notice
@@ -40,18 +34,15 @@ static __init int module_verify_init(void)
{
pr_notice("Initialise module verification\n");
modsign_keyring = key_alloc(&key_type_keyring, ".module_sign",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA);
modsign_keyring = keyring_alloc(".module_sign",
KUIDT_INIT(0), KGIDT_INIT(0),
current_cred(),
((KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ),
KEY_ALLOC_NOT_IN_QUOTA, NULL);
if (IS_ERR(modsign_keyring))
panic("Can't allocate module signing keyring\n");
if (key_instantiate_and_link(modsign_keyring, NULL, 0, NULL, NULL) < 0)
panic("Can't instantiate module signing keyring\n");
return 0;
}

Переглянути файл

@@ -21,6 +21,7 @@
#include <linux/ftrace_event.h>
#include <linux/init.h>
#include <linux/kallsyms.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/sysfs.h>
#include <linux/kernel.h>
@@ -28,6 +29,7 @@
#include <linux/vmalloc.h>
#include <linux/elf.h>
#include <linux/proc_fs.h>
#include <linux/security.h>
#include <linux/seq_file.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
@@ -59,6 +61,7 @@
#include <linux/pfn.h>
#include <linux/bsearch.h>
#include <linux/fips.h>
#include <uapi/linux/module.h>
#include "module-internal.h"
#define CREATE_TRACE_POINTS
@@ -185,6 +188,7 @@ struct load_info {
ongoing or failed initialization etc. */
static inline int strong_try_module_get(struct module *mod)
{
BUG_ON(mod && mod->state == MODULE_STATE_UNFORMED);
if (mod && mod->state == MODULE_STATE_COMING)
return -EBUSY;
if (try_module_get(mod))
@@ -340,6 +344,9 @@ bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
#endif
};
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
return true;
}
@@ -447,16 +454,24 @@ const struct kernel_symbol *find_symbol(const char *name,
EXPORT_SYMBOL_GPL(find_symbol);
/* Search for module by name: must hold module_mutex. */
struct module *find_module(const char *name)
static struct module *find_module_all(const char *name,
bool even_unformed)
{
struct module *mod;
list_for_each_entry(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
if (strcmp(mod->name, name) == 0)
return mod;
}
return NULL;
}
struct module *find_module(const char *name)
{
return find_module_all(name, false);
}
EXPORT_SYMBOL_GPL(find_module);
#ifdef CONFIG_SMP
@@ -522,6 +537,8 @@ bool is_module_percpu_address(unsigned long addr)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (!mod->percpu_size)
continue;
for_each_possible_cpu(cpu) {
@@ -1045,6 +1062,8 @@ static ssize_t show_initstate(struct module_attribute *mattr,
case MODULE_STATE_GOING:
state = "going";
break;
default:
BUG();
}
return sprintf(buffer, "%s\n", state);
}
@@ -1783,6 +1802,8 @@ void set_all_modules_text_rw(void)
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if ((mod->module_core) && (mod->core_text_size)) {
set_page_attributes(mod->module_core,
mod->module_core + mod->core_text_size,
@@ -1804,6 +1825,8 @@ void set_all_modules_text_ro(void)
mutex_lock(&module_mutex);
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if ((mod->module_core) && (mod->core_text_size)) {
set_page_attributes(mod->module_core,
mod->module_core + mod->core_text_size,
@@ -2279,7 +2302,7 @@ static void layout_symtab(struct module *mod, struct load_info *info)
Elf_Shdr *symsect = info->sechdrs + info->index.sym;
Elf_Shdr *strsect = info->sechdrs + info->index.str;
const Elf_Sym *src;
unsigned int i, nsrc, ndst, strtab_size;
unsigned int i, nsrc, ndst, strtab_size = 0;
/* Put symbol section at end of init part of module. */
symsect->sh_flags |= SHF_ALLOC;
@@ -2290,9 +2313,6 @@ static void layout_symtab(struct module *mod, struct load_info *info)
src = (void *)info->hdr + symsect->sh_offset;
nsrc = symsect->sh_size / sizeof(*src);
/* strtab always starts with a nul, so offset 0 is the empty string. */
strtab_size = 1;
/* Compute total space required for the core symbols' strtab. */
for (ndst = i = 0; i < nsrc; i++) {
if (i == 0 ||
@@ -2334,7 +2354,6 @@ static void add_kallsyms(struct module *mod, const struct load_info *info)
mod->core_symtab = dst = mod->module_core + info->symoffs;
mod->core_strtab = s = mod->module_core + info->stroffs;
src = mod->symtab;
*s++ = 0;
for (ndst = i = 0; i < mod->num_symtab; i++) {
if (i == 0 ||
is_core_symbol(src+i, info->sechdrs, info->hdr->e_shnum)) {
@@ -2375,7 +2394,7 @@ static void dynamic_debug_remove(struct _ddebug *debug)
void * __weak module_alloc(unsigned long size)
{
return size == 0 ? NULL : vmalloc_exec(size);
return vmalloc_exec(size);
}
static void *module_alloc_update_bounds(unsigned long size)
@@ -2422,18 +2441,17 @@ static inline void kmemleak_load_module(const struct module *mod,
#endif
#ifdef CONFIG_MODULE_SIG
static int module_sig_check(struct load_info *info,
const void *mod, unsigned long *_len)
static int module_sig_check(struct load_info *info)
{
int err = -ENOKEY;
unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
unsigned long len = *_len;
const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
const void *mod = info->hdr;
if (len > markerlen &&
memcmp(mod + len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
if (info->len > markerlen &&
memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
/* We truncate the module to discard the signature */
*_len -= markerlen;
err = mod_verify_sig(mod, _len);
info->len -= markerlen;
err = mod_verify_sig(mod, &info->len);
}
if (!err) {
@@ -2451,59 +2469,114 @@ static int module_sig_check(struct load_info *info,
return err;
}
#else /* !CONFIG_MODULE_SIG */
static int module_sig_check(struct load_info *info,
void *mod, unsigned long *len)
static int module_sig_check(struct load_info *info)
{
return 0;
}
#endif /* !CONFIG_MODULE_SIG */
/* Sets info->hdr, info->len and info->sig_ok. */
static int copy_and_check(struct load_info *info,
const void __user *umod, unsigned long len,
const char __user *uargs)
/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
static int elf_header_check(struct load_info *info)
{
int err;
Elf_Ehdr *hdr;
if (len < sizeof(*hdr))
if (info->len < sizeof(*(info->hdr)))
return -ENOEXEC;
if (memcmp(info->hdr->e_ident, ELFMAG, SELFMAG) != 0
|| info->hdr->e_type != ET_REL
|| !elf_check_arch(info->hdr)
|| info->hdr->e_shentsize != sizeof(Elf_Shdr))
return -ENOEXEC;
if (info->hdr->e_shoff >= info->len
|| (info->hdr->e_shnum * sizeof(Elf_Shdr) >
info->len - info->hdr->e_shoff))
return -ENOEXEC;
return 0;
}
/* Sets info->hdr and info->len. */
static int copy_module_from_user(const void __user *umod, unsigned long len,
struct load_info *info)
{
int err;
info->len = len;
if (info->len < sizeof(*(info->hdr)))
return -ENOEXEC;
err = security_kernel_module_from_file(NULL);
if (err)
return err;
/* Suck in entire file: we'll want most of it. */
if ((hdr = vmalloc(len)) == NULL)
info->hdr = vmalloc(info->len);
if (!info->hdr)
return -ENOMEM;
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
if (copy_from_user(info->hdr, umod, info->len) != 0) {
vfree(info->hdr);
return -EFAULT;
}
err = module_sig_check(info, hdr, &len);
if (err)
goto free_hdr;
/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
|| hdr->e_shentsize != sizeof(Elf_Shdr)) {
err = -ENOEXEC;
goto free_hdr;
}
if (hdr->e_shoff >= len ||
hdr->e_shnum * sizeof(Elf_Shdr) > len - hdr->e_shoff) {
err = -ENOEXEC;
goto free_hdr;
}
info->hdr = hdr;
info->len = len;
return 0;
}
free_hdr:
vfree(hdr);
/* Sets info->hdr and info->len. */
static int copy_module_from_fd(int fd, struct load_info *info)
{
struct file *file;
int err;
struct kstat stat;
loff_t pos;
ssize_t bytes = 0;
file = fget(fd);
if (!file)
return -ENOEXEC;
err = security_kernel_module_from_file(file);
if (err)
goto out;
err = vfs_getattr(file->f_vfsmnt, file->f_dentry, &stat);
if (err)
goto out;
if (stat.size > INT_MAX) {
err = -EFBIG;
goto out;
}
/* Don't hand 0 to vmalloc, it whines. */
if (stat.size == 0) {
err = -EINVAL;
goto out;
}
info->hdr = vmalloc(stat.size);
if (!info->hdr) {
err = -ENOMEM;
goto out;
}
pos = 0;
while (pos < stat.size) {
bytes = kernel_read(file, pos, (char *)(info->hdr) + pos,
stat.size - pos);
if (bytes < 0) {
vfree(info->hdr);
err = bytes;
goto out;
}
if (bytes == 0)
break;
pos += bytes;
}
info->len = pos;
out:
fput(file);
return err;
}
@@ -2512,7 +2585,7 @@ static void free_copy(struct load_info *info)
vfree(info->hdr);
}
static int rewrite_section_headers(struct load_info *info)
static int rewrite_section_headers(struct load_info *info, int flags)
{
unsigned int i;
@@ -2540,7 +2613,10 @@ static int rewrite_section_headers(struct load_info *info)
}
/* Track but don't keep modinfo and version sections. */
info->index.vers = find_sec(info, "__versions");
if (flags & MODULE_INIT_IGNORE_MODVERSIONS)
info->index.vers = 0; /* Pretend no __versions section! */
else
info->index.vers = find_sec(info, "__versions");
info->index.info = find_sec(info, ".modinfo");
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -2555,7 +2631,7 @@ static int rewrite_section_headers(struct load_info *info)
* Return the temporary module pointer (we'll replace it with the final
* one when we move the module sections around).
*/
static struct module *setup_load_info(struct load_info *info)
static struct module *setup_load_info(struct load_info *info, int flags)
{
unsigned int i;
int err;
@@ -2566,7 +2642,7 @@ static struct module *setup_load_info(struct load_info *info)
info->secstrings = (void *)info->hdr
+ info->sechdrs[info->hdr->e_shstrndx].sh_offset;
err = rewrite_section_headers(info);
err = rewrite_section_headers(info, flags);
if (err)
return ERR_PTR(err);
@@ -2604,11 +2680,14 @@ static struct module *setup_load_info(struct load_info *info)
return mod;
}
static int check_modinfo(struct module *mod, struct load_info *info)
static int check_modinfo(struct module *mod, struct load_info *info, int flags)
{
const char *modmagic = get_modinfo(info, "vermagic");
int err;
if (flags & MODULE_INIT_IGNORE_VERMAGIC)
modmagic = NULL;
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
err = try_to_force_load(mod, "bad vermagic");
@@ -2738,20 +2817,23 @@ static int move_module(struct module *mod, struct load_info *info)
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
module_free(mod, mod->module_core);
return -ENOMEM;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;
if (mod->init_size) {
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr) {
module_free(mod, mod->module_core);
return -ENOMEM;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;
} else
mod->module_init = NULL;
/* Transfer each section which specifies SHF_ALLOC */
pr_debug("final section addresses:\n");
@@ -2844,18 +2926,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
return 0;
}
static struct module *layout_and_allocate(struct load_info *info)
static struct module *layout_and_allocate(struct load_info *info, int flags)
{
/* Module within temporary copy. */
struct module *mod;
Elf_Shdr *pcpusec;
int err;
mod = setup_load_info(info);
mod = setup_load_info(info, flags);
if (IS_ERR(mod))
return mod;
err = check_modinfo(mod, info);
err = check_modinfo(mod, info, flags);
if (err)
return ERR_PTR(err);
@@ -2935,163 +3017,14 @@ static bool finished_loading(const char *name)
bool ret;
mutex_lock(&module_mutex);
mod = find_module(name);
ret = !mod || mod->state != MODULE_STATE_COMING;
mod = find_module_all(name, true);
ret = !mod || mod->state == MODULE_STATE_LIVE
|| mod->state == MODULE_STATE_GOING;
mutex_unlock(&module_mutex);
return ret;
}
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
struct load_info info = { NULL, };
struct module *mod, *old;
long err;
pr_debug("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
/* Copy in the blobs from userspace, check they are vaguely sane. */
err = copy_and_check(&info, umod, len, uargs);
if (err)
return ERR_PTR(err);
/* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(&info);
if (IS_ERR(mod)) {
err = PTR_ERR(mod);
goto free_copy;
}
#ifdef CONFIG_MODULE_SIG
mod->sig_ok = info.sig_ok;
if (!mod->sig_ok)
add_taint_module(mod, TAINT_FORCED_MODULE);
#endif
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
goto free_module;
/* Now we've got everything in the final locations, we can
* find optional sections. */
find_module_sections(mod, &info);
err = check_module_license_and_versions(mod);
if (err)
goto free_unload;
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, &info);
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, &info);
if (err < 0)
goto free_modinfo;
err = apply_relocations(mod, &info);
if (err < 0)
goto free_modinfo;
err = post_relocation(mod, &info);
if (err < 0)
goto free_modinfo;
flush_module_icache(mod);
/* Now copy in args */
mod->args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
}
/* Mark state as coming so strong_try_module_get() ignores us. */
mod->state = MODULE_STATE_COMING;
/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. No one should access us, since
* strong_try_module_get() will fail.
* lockdep/oops can run asynchronous, so use the RCU list insertion
* function to insert in a way safe to concurrent readers.
* The mutex protects against concurrent writers.
*/
again:
mutex_lock(&module_mutex);
if ((old = find_module(mod->name)) != NULL) {
if (old->state == MODULE_STATE_COMING) {
/* Wait in case it fails to load. */
mutex_unlock(&module_mutex);
err = wait_event_interruptible(module_wq,
finished_loading(mod->name));
if (err)
goto free_arch_cleanup;
goto again;
}
err = -EEXIST;
goto unlock;
}
/* This has to be done once we're sure module name is unique. */
dynamic_debug_setup(info.debug, info.num_debug);
/* Find duplicate symbols */
err = verify_export_symbols(mod);
if (err < 0)
goto ddebug;
module_bug_finalize(info.hdr, info.sechdrs, mod);
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, &ddebug_dyndbg_module_param_cb);
if (err < 0)
goto unlink;
/* Link in to syfs. */
err = mod_sysfs_setup(mod, &info, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
/* Get rid of temporary copy. */
free_copy(&info);
/* Done! */
trace_module_load(mod);
return mod;
unlink:
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
module_bug_cleanup(mod);
wake_up_all(&module_wq);
ddebug:
dynamic_debug_remove(info.debug);
unlock:
mutex_unlock(&module_mutex);
synchronize_sched();
kfree(mod->args);
free_arch_cleanup:
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
free_module:
module_deallocate(mod, &info);
free_copy:
free_copy(&info);
return ERR_PTR(err);
}
/* Call module constructors. */
static void do_mod_ctors(struct module *mod)
{
@@ -3104,20 +3037,15 @@ static void do_mod_ctors(struct module *mod)
}
/* This is where the real work happens */
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
static int do_init_module(struct module *mod)
{
struct module *mod;
int ret = 0;
/* Must have permission */
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
/* Do all the hard work */
mod = load_module(umod, len, uargs);
if (IS_ERR(mod))
return PTR_ERR(mod);
/*
* We want to find out whether @mod uses async during init. Clear
* PF_USED_ASYNC. async_schedule*() will set it.
*/
current->flags &= ~PF_USED_ASYNC;
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
@@ -3164,8 +3092,25 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_LIVE, mod);
/* We need to finish all async code before the module init sequence is done */
async_synchronize_full();
/*
* We need to finish all async code before the module init sequence
* is done. This has potential to deadlock. For example, a newly
* detected block device can trigger request_module() of the
* default iosched from async probing task. Once userland helper
* reaches here, async_synchronize_full() will wait on the async
* task waiting on request_module() and deadlock.
*
* This deadlock is avoided by perfomring async_synchronize_full()
* iff module init queued any async jobs. This isn't a full
* solution as it will deadlock the same if module loading from
* async jobs nests more than once; however, due to the various
* constraints, this hack seems to be the best option for now.
* Please refer to the following thread for details.
*
* http://thread.gmane.org/gmane.linux.kernel/1420814
*/
if (current->flags & PF_USED_ASYNC)
async_synchronize_full();
mutex_lock(&module_mutex);
/* Drop initial reference. */
@@ -3188,6 +3133,213 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
return 0;
}
static int may_init_module(void)
{
if (!capable(CAP_SYS_MODULE) || modules_disabled)
return -EPERM;
return 0;
}
/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
static int load_module(struct load_info *info, const char __user *uargs,
int flags)
{
struct module *mod, *old;
long err;
err = module_sig_check(info);
if (err)
goto free_copy;
err = elf_header_check(info);
if (err)
goto free_copy;
/* Figure out module layout, and allocate all the memory. */
mod = layout_and_allocate(info, flags);
if (IS_ERR(mod)) {
err = PTR_ERR(mod);
goto free_copy;
}
/*
* We try to place it in the list now to make sure it's unique
* before we dedicate too many resources. In particular,
* temporary percpu memory exhaustion.
*/
mod->state = MODULE_STATE_UNFORMED;
again:
mutex_lock(&module_mutex);
if ((old = find_module_all(mod->name, true)) != NULL) {
if (old->state == MODULE_STATE_COMING
|| old->state == MODULE_STATE_UNFORMED) {
/* Wait in case it fails to load. */
mutex_unlock(&module_mutex);
err = wait_event_interruptible(module_wq,
finished_loading(mod->name));
if (err)
goto free_module;
goto again;
}
err = -EEXIST;
mutex_unlock(&module_mutex);
goto free_module;
}
list_add_rcu(&mod->list, &modules);
mutex_unlock(&module_mutex);
#ifdef CONFIG_MODULE_SIG
mod->sig_ok = info->sig_ok;
if (!mod->sig_ok)
add_taint_module(mod, TAINT_FORCED_MODULE);
#endif
/* Now module is in final location, initialize linked lists, etc. */
err = module_unload_init(mod);
if (err)
goto unlink_mod;
/* Now we've got everything in the final locations, we can
* find optional sections. */
find_module_sections(mod, info);
err = check_module_license_and_versions(mod);
if (err)
goto free_unload;
/* Set up MODINFO_ATTR fields */
setup_modinfo(mod, info);
/* Fix up syms, so that st_value is a pointer to location. */
err = simplify_symbols(mod, info);
if (err < 0)
goto free_modinfo;
err = apply_relocations(mod, info);
if (err < 0)
goto free_modinfo;
err = post_relocation(mod, info);
if (err < 0)
goto free_modinfo;
flush_module_icache(mod);
/* Now copy in args */
mod->args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(mod->args)) {
err = PTR_ERR(mod->args);
goto free_arch_cleanup;
}
dynamic_debug_setup(info->debug, info->num_debug);
mutex_lock(&module_mutex);
/* Find duplicate symbols (must be called under lock). */
err = verify_export_symbols(mod);
if (err < 0)
goto ddebug_cleanup;
/* This relies on module_mutex for list integrity. */
module_bug_finalize(info->hdr, info->sechdrs, mod);
/* Mark state as coming so strong_try_module_get() ignores us,
* but kallsyms etc. can see us. */
mod->state = MODULE_STATE_COMING;
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp,
-32768, 32767, &ddebug_dyndbg_module_param_cb);
if (err < 0)
goto bug_cleanup;
/* Link in to syfs. */
err = mod_sysfs_setup(mod, info, mod->kp, mod->num_kp);
if (err < 0)
goto bug_cleanup;
/* Get rid of temporary copy. */
free_copy(info);
/* Done! */
trace_module_load(mod);
return do_init_module(mod);
bug_cleanup:
/* module_bug_cleanup needs module_mutex protection */
mutex_lock(&module_mutex);
module_bug_cleanup(mod);
ddebug_cleanup:
mutex_unlock(&module_mutex);
dynamic_debug_remove(info->debug);
synchronize_sched();
kfree(mod->args);
free_arch_cleanup:
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
free_unload:
module_unload_free(mod);
unlink_mod:
mutex_lock(&module_mutex);
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
wake_up_all(&module_wq);
mutex_unlock(&module_mutex);
free_module:
module_deallocate(mod, info);
free_copy:
free_copy(info);
return err;
}
SYSCALL_DEFINE3(init_module, void __user *, umod,
unsigned long, len, const char __user *, uargs)
{
int err;
struct load_info info = { };
err = may_init_module();
if (err)
return err;
pr_debug("init_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
err = copy_module_from_user(umod, len, &info);
if (err)
return err;
return load_module(&info, uargs, 0);
}
SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags)
{
int err;
struct load_info info = { };
err = may_init_module();
if (err)
return err;
pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags);
if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS
|MODULE_INIT_IGNORE_VERMAGIC))
return -EINVAL;
err = copy_module_from_fd(fd, &info);
if (err)
return err;
return load_module(&info, uargs, flags);
}
static inline int within(unsigned long addr, void *start, unsigned long size)
{
return ((void *)addr >= start && (void *)addr < start + size);
@@ -3261,6 +3413,8 @@ const char *module_address_lookup(unsigned long addr,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
if (modname)
@@ -3284,6 +3438,8 @@ int lookup_module_symbol_name(unsigned long addr, char *symname)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
@@ -3308,6 +3464,8 @@ int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module_init(addr, mod) ||
within_module_core(addr, mod)) {
const char *sym;
@@ -3335,6 +3493,8 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (symnum < mod->num_symtab) {
*value = mod->symtab[symnum].st_value;
*type = mod->symtab[symnum].st_info;
@@ -3377,9 +3537,12 @@ unsigned long module_kallsyms_lookup_name(const char *name)
ret = mod_find_symname(mod, colon+1);
*colon = ':';
} else {
list_for_each_entry_rcu(mod, &modules, list)
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if ((ret = mod_find_symname(mod, name)) != 0)
break;
}
}
preempt_enable();
return ret;
@@ -3394,6 +3557,8 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
int ret;
list_for_each_entry(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
for (i = 0; i < mod->num_symtab; i++) {
ret = fn(data, mod->strtab + mod->symtab[i].st_name,
mod, mod->symtab[i].st_value);
@@ -3409,6 +3574,7 @@ static char *module_flags(struct module *mod, char *buf)
{
int bx = 0;
BUG_ON(mod->state == MODULE_STATE_UNFORMED);
if (mod->taints ||
mod->state == MODULE_STATE_GOING ||
mod->state == MODULE_STATE_COMING) {
@@ -3450,6 +3616,10 @@ static int m_show(struct seq_file *m, void *p)
struct module *mod = list_entry(p, struct module, list);
char buf[8];
/* We always ignore unformed modules. */
if (mod->state == MODULE_STATE_UNFORMED)
return 0;
seq_printf(m, "%s %u",
mod->name, mod->init_size + mod->core_size);
print_unload_info(m, mod);
@@ -3510,6 +3680,8 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (mod->num_exentries == 0)
continue;
@@ -3558,10 +3730,13 @@ struct module *__module_address(unsigned long addr)
if (addr < module_addr_min || addr > module_addr_max)
return NULL;
list_for_each_entry_rcu(mod, &modules, list)
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
if (within_module_core(addr, mod)
|| within_module_init(addr, mod))
return mod;
}
return NULL;
}
EXPORT_SYMBOL_GPL(__module_address);
@@ -3614,8 +3789,11 @@ void print_modules(void)
printk(KERN_DEFAULT "Modules linked in:");
/* Most callers should already have preempt disabled, but make sure */
preempt_disable();
list_for_each_entry_rcu(mod, &modules, list)
list_for_each_entry_rcu(mod, &modules, list) {
if (mod->state == MODULE_STATE_UNFORMED)
continue;
printk(" %s%s", mod->name, module_flags(mod, buf));
}
preempt_enable();
if (last_unloaded_module[0])
printk(" [last unloaded: %s]", last_unloaded_module);

Переглянути файл

@@ -270,7 +270,6 @@ void free_pid(struct pid *pid)
wake_up_process(ns->child_reaper);
break;
case 0:
ns->nr_hashed = -1;
schedule_work(&ns->proc_work);
break;
}
@@ -319,7 +318,7 @@ struct pid *alloc_pid(struct pid_namespace *ns)
upid = pid->numbers + ns->level;
spin_lock_irq(&pidmap_lock);
if (ns->nr_hashed < 0)
if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
goto out_unlock;
for ( ; upid >= pid->numbers; --upid) {
hlist_add_head_rcu(&upid->pid_chain,
@@ -342,6 +341,13 @@ out_free:
goto out;
}
void disable_pid_allocation(struct pid_namespace *ns)
{
spin_lock_irq(&pidmap_lock);
ns->nr_hashed &= ~PIDNS_HASH_ADDING;
spin_unlock_irq(&pidmap_lock);
}
struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
{
struct hlist_node *elem;
@@ -573,6 +579,9 @@ void __init pidhash_init(void)
void __init pidmap_init(void)
{
/* Veryify no one has done anything silly */
BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
/* bump default and minimum pid_max based on number of cpus */
pid_max = min(pid_max_max, max_t(int, pid_max,
PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
@@ -584,7 +593,7 @@ void __init pidmap_init(void)
/* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, init_pid_ns.pidmap[0].page);
atomic_dec(&init_pid_ns.pidmap[0].nr_free);
init_pid_ns.nr_hashed = 1;
init_pid_ns.nr_hashed = PIDNS_HASH_ADDING;
init_pid_ns.pid_cachep = KMEM_CACHE(pid,
SLAB_HWCACHE_ALIGN | SLAB_PANIC);

Переглянути файл

@@ -115,6 +115,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
ns->nr_hashed = PIDNS_HASH_ADDING;
INIT_WORK(&ns->proc_work, proc_cleanup_work);
set_bit(0, ns->pidmap[0].page);
@@ -181,6 +182,9 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
int rc;
struct task_struct *task, *me = current;
/* Don't allow any more processes into the pid namespace */
disable_pid_allocation(pid_ns);
/* Ignore SIGCHLD causing any terminated children to autoreap */
spin_lock_irq(&me->sighand->siglock);
me->sighand->action[SIGCHLD - 1].sa.sa_handler = SIG_IGN;
@@ -325,7 +329,8 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
struct pid_namespace *active = task_active_pid_ns(current);
struct pid_namespace *ancestor, *new = ns;
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN))
if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN))
return -EPERM;
/*

Переглянути файл

@@ -9,6 +9,7 @@
#include <asm/uaccess.h>
#include <linux/kernel_stat.h>
#include <trace/events/timer.h>
#include <linux/random.h>
/*
* Called after updating RLIMIT_CPU to run cpu timer and update
@@ -470,6 +471,8 @@ static void cleanup_timers(struct list_head *head,
*/
void posix_cpu_timers_exit(struct task_struct *tsk)
{
add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
sizeof(unsigned long long));
cleanup_timers(tsk->cpu_timers,
tsk->utime, tsk->stime, tsk->se.sum_exec_runtime);

Переглянути файл

@@ -870,10 +870,11 @@ static size_t print_time(u64 ts, char *buf)
if (!printk_time)
return 0;
if (!buf)
return 15;
rem_nsec = do_div(ts, 1000000000);
if (!buf)
return snprintf(NULL, 0, "[%5lu.000000] ", (unsigned long)ts);
return sprintf(buf, "[%5lu.%06lu] ",
(unsigned long)ts, rem_nsec / 1000);
}

Переглянути файл

@@ -117,11 +117,45 @@ void __ptrace_unlink(struct task_struct *child)
* TASK_KILLABLE sleeps.
*/
if (child->jobctl & JOBCTL_STOP_PENDING || task_is_traced(child))
signal_wake_up(child, task_is_traced(child));
ptrace_signal_wake_up(child, true);
spin_unlock(&child->sighand->siglock);
}
/* Ensure that nothing can wake it up, even SIGKILL */
static bool ptrace_freeze_traced(struct task_struct *task)
{
bool ret = false;
/* Lockless, nobody but us can set this flag */
if (task->jobctl & JOBCTL_LISTENING)
return ret;
spin_lock_irq(&task->sighand->siglock);
if (task_is_traced(task) && !__fatal_signal_pending(task)) {
task->state = __TASK_TRACED;
ret = true;
}
spin_unlock_irq(&task->sighand->siglock);
return ret;
}
static void ptrace_unfreeze_traced(struct task_struct *task)
{
if (task->state != __TASK_TRACED)
return;
WARN_ON(!task->ptrace || task->parent != current);
spin_lock_irq(&task->sighand->siglock);
if (__fatal_signal_pending(task))
wake_up_state(task, __TASK_TRACED);
else
task->state = TASK_TRACED;
spin_unlock_irq(&task->sighand->siglock);
}
/**
* ptrace_check_attach - check whether ptracee is ready for ptrace operation
* @child: ptracee to check for
@@ -139,7 +173,7 @@ void __ptrace_unlink(struct task_struct *child)
* RETURNS:
* 0 on success, -ESRCH if %child is not ready.
*/
int ptrace_check_attach(struct task_struct *child, bool ignore_state)
static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
{
int ret = -ESRCH;
@@ -151,24 +185,29 @@ int ptrace_check_attach(struct task_struct *child, bool ignore_state)
* be changed by us so it's not changing right after this.
*/
read_lock(&tasklist_lock);
if ((child->ptrace & PT_PTRACED) && child->parent == current) {
if (child->ptrace && child->parent == current) {
WARN_ON(child->state == __TASK_TRACED);
/*
* child->sighand can't be NULL, release_task()
* does ptrace_unlink() before __exit_signal().
*/
spin_lock_irq(&child->sighand->siglock);
WARN_ON_ONCE(task_is_stopped(child));
if (ignore_state || (task_is_traced(child) &&
!(child->jobctl & JOBCTL_LISTENING)))
if (ignore_state || ptrace_freeze_traced(child))
ret = 0;
spin_unlock_irq(&child->sighand->siglock);
}
read_unlock(&tasklist_lock);
if (!ret && !ignore_state)
ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH;
if (!ret && !ignore_state) {
if (!wait_task_inactive(child, __TASK_TRACED)) {
/*
* This can only happen if may_ptrace_stop() fails and
* ptrace_stop() changes ->state back to TASK_RUNNING,
* so we should not worry about leaking __TASK_TRACED.
*/
WARN_ON(child->state == __TASK_TRACED);
ret = -ESRCH;
}
}
/* All systems go.. */
return ret;
}
@@ -317,7 +356,7 @@ static int ptrace_attach(struct task_struct *task, long request,
*/
if (task_is_stopped(task) &&
task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
signal_wake_up(task, 1);
signal_wake_up_state(task, __TASK_STOPPED);
spin_unlock(&task->sighand->siglock);
@@ -737,7 +776,7 @@ int ptrace_request(struct task_struct *child, long request,
* tracee into STOP.
*/
if (likely(task_set_jobctl_pending(child, JOBCTL_TRAP_STOP)))
signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
ptrace_signal_wake_up(child, child->jobctl & JOBCTL_LISTENING);
unlock_task_sighand(child, &flags);
ret = 0;
@@ -763,7 +802,7 @@ int ptrace_request(struct task_struct *child, long request,
* start of this trap and now. Trigger re-trap.
*/
if (child->jobctl & JOBCTL_TRAP_NOTIFY)
signal_wake_up(child, true);
ptrace_signal_wake_up(child, true);
ret = 0;
}
unlock_task_sighand(child, &flags);
@@ -900,6 +939,8 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr,
goto out_put_task_struct;
ret = arch_ptrace(child, request, addr, data);
if (ret || request != PTRACE_DETACH)
ptrace_unfreeze_traced(child);
out_put_task_struct:
put_task_struct(child);
@@ -1039,8 +1080,11 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
ret = ptrace_check_attach(child, request == PTRACE_KILL ||
request == PTRACE_INTERRUPT);
if (!ret)
if (!ret) {
ret = compat_arch_ptrace(child, request, addr, data);
if (ret || request != PTRACE_DETACH)
ptrace_unfreeze_traced(child);
}
out_put_task_struct:
put_task_struct(child);

Переглянути файл

@@ -86,33 +86,39 @@ int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
return __res_counter_charge(counter, val, limit_fail_at, true);
}
void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
{
if (WARN_ON(counter->usage < val))
val = counter->usage;
counter->usage -= val;
return counter->usage;
}
void res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *top,
unsigned long val)
u64 res_counter_uncharge_until(struct res_counter *counter,
struct res_counter *top,
unsigned long val)
{
unsigned long flags;
struct res_counter *c;
u64 ret = 0;
local_irq_save(flags);
for (c = counter; c != top; c = c->parent) {
u64 r;
spin_lock(&c->lock);
res_counter_uncharge_locked(c, val);
r = res_counter_uncharge_locked(c, val);
if (c == counter)
ret = r;
spin_unlock(&c->lock);
}
local_irq_restore(flags);
return ret;
}
void res_counter_uncharge(struct res_counter *counter, unsigned long val)
u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
{
res_counter_uncharge_until(counter, NULL, val);
return res_counter_uncharge_until(counter, NULL, val);
}
static inline unsigned long long *

Переглянути файл

@@ -116,6 +116,16 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
EXPORT_SYMBOL(down_read_nested);
void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
{
might_sleep();
rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
}
EXPORT_SYMBOL(_down_write_nest_lock);
void down_write_nested(struct rw_semaphore *sem, int subclass)
{
might_sleep();

Переглянути файл

@@ -1523,7 +1523,8 @@ out:
*/
int wake_up_process(struct task_struct *p)
{
return try_to_wake_up(p, TASK_ALL, 0);
WARN_ON(task_is_stopped_or_traced(p));
return try_to_wake_up(p, TASK_NORMAL, 0);
}
EXPORT_SYMBOL(wake_up_process);

Переглянути файл

@@ -793,8 +793,11 @@ unsigned int sysctl_numa_balancing_scan_delay = 1000;
static void task_numa_placement(struct task_struct *p)
{
int seq = ACCESS_ONCE(p->mm->numa_scan_seq);
int seq;
if (!p->mm) /* for example, ksmd faulting in a user's mm */
return;
seq = ACCESS_ONCE(p->mm->numa_scan_seq);
if (p->numa_scan_seq == seq)
return;
p->numa_scan_seq = seq;

Переглянути файл

@@ -31,6 +31,7 @@
#include <linux/nsproxy.h>
#include <linux/user_namespace.h>
#include <linux/uprobes.h>
#include <linux/compat.h>
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
@@ -679,23 +680,17 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
* No need to set need_resched since signal event passing
* goes through ->blocked
*/
void signal_wake_up(struct task_struct *t, int resume)
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
unsigned int mask;
set_tsk_thread_flag(t, TIF_SIGPENDING);
/*
* For SIGKILL, we want to wake it up in the stopped/traced/killable
* TASK_WAKEKILL also means wake it up in the stopped/traced/killable
* case. We don't check t->state here because there is a race with it
* executing another processor and just now entering stopped state.
* By using wake_up_state, we ensure the process will wake up and
* handle its death signal.
*/
mask = TASK_INTERRUPTIBLE;
if (resume)
mask |= TASK_WAKEKILL;
if (!wake_up_state(t, mask))
if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
kick_process(t);
}
@@ -843,7 +838,7 @@ static void ptrace_trap_notify(struct task_struct *t)
assert_spin_locked(&t->sighand->siglock);
task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
}
/*
@@ -1799,6 +1794,10 @@ static inline int may_ptrace_stop(void)
* If SIGKILL was already sent before the caller unlocked
* ->siglock we must see ->core_state != NULL. Otherwise it
* is safe to enter schedule().
*
* This is almost outdated, a task with the pending SIGKILL can't
* block in TASK_TRACED. But PTRACE_EVENT_EXIT can be reported
* after SIGKILL was already dequeued.
*/
if (unlikely(current->mm->core_state) &&
unlikely(current->mm == current->parent->mm))
@@ -1924,6 +1923,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
if (gstop_done)
do_notify_parent_cldstop(current, false, why);
/* tasklist protects us from ptrace_freeze_traced() */
__set_current_state(TASK_RUNNING);
if (clear_code)
current->exit_code = 0;
@@ -2527,11 +2527,8 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
*/
void set_current_blocked(sigset_t *newset)
{
struct task_struct *tsk = current;
sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
spin_lock_irq(&tsk->sighand->siglock);
__set_task_blocked(tsk, newset);
spin_unlock_irq(&tsk->sighand->siglock);
__set_current_blocked(newset);
}
void __set_current_blocked(const sigset_t *newset)
@@ -3094,6 +3091,80 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
out:
return error;
}
#ifdef CONFIG_GENERIC_SIGALTSTACK
SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
{
return do_sigaltstack(uss, uoss, current_user_stack_pointer());
}
#endif
int restore_altstack(const stack_t __user *uss)
{
int err = do_sigaltstack(uss, NULL, current_user_stack_pointer());
/* squash all but EFAULT for now */
return err == -EFAULT ? err : 0;
}
int __save_altstack(stack_t __user *uss, unsigned long sp)
{
struct task_struct *t = current;
return __put_user((void __user *)t->sas_ss_sp, &uss->ss_sp) |
__put_user(sas_ss_flags(sp), &uss->ss_flags) |
__put_user(t->sas_ss_size, &uss->ss_size);
}
#ifdef CONFIG_COMPAT
#ifdef CONFIG_GENERIC_SIGALTSTACK
COMPAT_SYSCALL_DEFINE2(sigaltstack,
const compat_stack_t __user *, uss_ptr,
compat_stack_t __user *, uoss_ptr)
{
stack_t uss, uoss;
int ret;
mm_segment_t seg;
if (uss_ptr) {
compat_stack_t uss32;
memset(&uss, 0, sizeof(stack_t));
if (copy_from_user(&uss32, uss_ptr, sizeof(compat_stack_t)))
return -EFAULT;
uss.ss_sp = compat_ptr(uss32.ss_sp);
uss.ss_flags = uss32.ss_flags;
uss.ss_size = uss32.ss_size;
}
seg = get_fs();
set_fs(KERNEL_DS);
ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
(stack_t __force __user *) &uoss,
compat_user_stack_pointer());
set_fs(seg);
if (ret >= 0 && uoss_ptr) {
if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(compat_stack_t)) ||
__put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
__put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
__put_user(uoss.ss_size, &uoss_ptr->ss_size))
ret = -EFAULT;
}
return ret;
}
int compat_restore_altstack(const compat_stack_t __user *uss)
{
int err = compat_sys_sigaltstack(uss, NULL);
/* squash all but -EFAULT for now */
return err == -EFAULT ? err : 0;
}
int __compat_save_altstack(compat_stack_t __user *uss, unsigned long sp)
{
struct task_struct *t = current;
return __put_user(ptr_to_compat((void __user *)t->sas_ss_sp), &uss->ss_sp) |
__put_user(sas_ss_flags(sp), &uss->ss_flags) |
__put_user(t->sas_ss_size, &uss->ss_size);
}
#endif
#endif
#ifdef __ARCH_WANT_SYS_SIGPENDING
@@ -3130,7 +3201,6 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
if (nset) {
if (copy_from_user(&new_set, nset, sizeof(*nset)))
return -EFAULT;
new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP));
new_blocked = current->blocked;
@@ -3148,7 +3218,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
return -EINVAL;
}
__set_current_blocked(&new_blocked);
set_current_blocked(&new_blocked);
}
if (oset) {
@@ -3212,6 +3282,7 @@ SYSCALL_DEFINE1(ssetmask, int, newmask)
int old = current->blocked.sig[0];
sigset_t newset;
siginitset(&newset, newmask);
set_current_blocked(&newset);
return old;

Переглянути файл

@@ -25,6 +25,7 @@ cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module);
cond_syscall(sys_socketpair);
cond_syscall(sys_bind);

Переглянути файл

@@ -3998,7 +3998,7 @@ static int ftrace_module_notify(struct notifier_block *self,
struct notifier_block ftrace_module_nb = {
.notifier_call = ftrace_module_notify,
.priority = 0,
.priority = INT_MAX, /* Run before anything that can use kprobes */
};
extern unsigned long __start_mcount_loc[];

Переглянути файл

@@ -2899,6 +2899,8 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
trace_set_options(buf);
*ppos += cnt;
@@ -3034,6 +3036,31 @@ static void set_buffer_entries(struct trace_array *tr, unsigned long val)
tr->data[cpu]->entries = val;
}
/* resize @tr's buffer to the size of @size_tr's entries */
static int resize_buffer_duplicate_size(struct trace_array *tr,
struct trace_array *size_tr, int cpu_id)
{
int cpu, ret = 0;
if (cpu_id == RING_BUFFER_ALL_CPUS) {
for_each_tracing_cpu(cpu) {
ret = ring_buffer_resize(tr->buffer,
size_tr->data[cpu]->entries, cpu);
if (ret < 0)
break;
tr->data[cpu]->entries = size_tr->data[cpu]->entries;
}
} else {
ret = ring_buffer_resize(tr->buffer,
size_tr->data[cpu_id]->entries, cpu_id);
if (ret == 0)
tr->data[cpu_id]->entries =
size_tr->data[cpu_id]->entries;
}
return ret;
}
static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
{
int ret;
@@ -3058,23 +3085,8 @@ static int __tracing_resize_ring_buffer(unsigned long size, int cpu)
ret = ring_buffer_resize(max_tr.buffer, size, cpu);
if (ret < 0) {
int r = 0;
if (cpu == RING_BUFFER_ALL_CPUS) {
int i;
for_each_tracing_cpu(i) {
r = ring_buffer_resize(global_trace.buffer,
global_trace.data[i]->entries,
i);
if (r < 0)
break;
}
} else {
r = ring_buffer_resize(global_trace.buffer,
global_trace.data[cpu]->entries,
cpu);
}
int r = resize_buffer_duplicate_size(&global_trace,
&global_trace, cpu);
if (r < 0) {
/*
* AARGH! We are left with different
@@ -3212,17 +3224,11 @@ static int tracing_set_tracer(const char *buf)
topts = create_trace_option_files(t);
if (t->use_max_tr) {
int cpu;
/* we need to make per cpu buffer sizes equivalent */
for_each_tracing_cpu(cpu) {
ret = ring_buffer_resize(max_tr.buffer,
global_trace.data[cpu]->entries,
cpu);
if (ret < 0)
goto out;
max_tr.data[cpu]->entries =
global_trace.data[cpu]->entries;
}
ret = resize_buffer_duplicate_size(&max_tr, &global_trace,
RING_BUFFER_ALL_CPUS);
if (ret < 0)
goto out;
}
if (t->init) {
@@ -3448,7 +3454,7 @@ static int tracing_wait_pipe(struct file *filp)
return -EINTR;
/*
* We block until we read something and tracing is enabled.
* We block until we read something and tracing is disabled.
* We still block if tracing is disabled, but we have never
* read anything. This allows a user to cat this file, and
* then enable tracing. But after we have read something,
@@ -3456,7 +3462,7 @@ static int tracing_wait_pipe(struct file *filp)
*
* iter->pos will be 0 if we haven't read anything.
*/
if (tracing_is_enabled() && iter->pos)
if (!tracing_is_enabled() && iter->pos)
break;
}
@@ -4271,13 +4277,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
return -ENOMEM;
if (*ppos & (PAGE_SIZE - 1)) {
WARN_ONCE(1, "Ftrace: previous read must page-align\n");
ret = -EINVAL;
goto out;
}
if (len & (PAGE_SIZE - 1)) {
WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
if (len < PAGE_SIZE) {
ret = -EINVAL;
goto out;
@@ -4813,10 +4817,17 @@ rb_simple_write(struct file *filp, const char __user *ubuf,
return ret;
if (buffer) {
if (val)
mutex_lock(&trace_types_lock);
if (val) {
ring_buffer_record_on(buffer);
else
if (current_trace->start)
current_trace->start(tr);
} else {
ring_buffer_record_off(buffer);
if (current_trace->stop)
current_trace->stop(tr);
}
mutex_unlock(&trace_types_lock);
}
(*ppos)++;

Переглянути файл

@@ -33,7 +33,6 @@ static unsigned long max_stack_size;
static arch_spinlock_t max_stack_lock =
(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
static int stack_trace_disabled __read_mostly;
static DEFINE_PER_CPU(int, trace_active);
static DEFINE_MUTEX(stack_sysctl_mutex);
@@ -116,9 +115,6 @@ stack_trace_call(unsigned long ip, unsigned long parent_ip,
{
int cpu;
if (unlikely(!ftrace_enabled || stack_trace_disabled))
return;
preempt_disable_notrace();
cpu = raw_smp_processor_id();

Переглянути файл

@@ -799,7 +799,7 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
if (user_ns == current_user_ns())
return -EINVAL;
/* Threaded many not enter a different user namespace */
/* Threaded processes may not enter a different user namespace */
if (atomic_read(&current->mm->mm_users) > 1)
return -EINVAL;

Переглянути файл

@@ -113,7 +113,8 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
{
struct uts_namespace *ns = new;
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN))
if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
!nsown_capable(CAP_SYS_ADMIN))
return -EPERM;
get_uts_ns(ns);

Переглянути файл

@@ -344,6 +344,10 @@ static void watchdog_enable(unsigned int cpu)
{
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
if (!watchdog_enabled) {
kthread_park(current);
return;
@@ -352,10 +356,6 @@ static void watchdog_enable(unsigned int cpu)
/* Enable the perf event */
watchdog_nmi_enable(cpu);
/* kick off the timer for the hardlockup detector */
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = watchdog_timer_fn;
/* done here because hrtimer_start can only pin to smp_processor_id() */
hrtimer_start(hrtimer, ns_to_ktime(sample_period),
HRTIMER_MODE_REL_PINNED);
@@ -369,9 +369,6 @@ static void watchdog_disable(unsigned int cpu)
{
struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
if (!watchdog_enabled)
return;
watchdog_set_prio(SCHED_NORMAL, 0);
hrtimer_cancel(hrtimer);
/* disable the perf event */