Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal

Pull big execve/kernel_thread/fork unification series from Al Viro:
 "All architectures are converted to new model.  Quite a bit of that
  stuff is actually shared with architecture trees; in such cases it's
  literally shared branch pulled by both, not a cherry-pick.

  A lot of ugliness and black magic is gone (-3KLoC total in this one):

   - kernel_thread()/kernel_execve()/sys_execve() redesign.

     We don't do syscalls from kernel anymore for either kernel_thread()
     or kernel_execve():

     kernel_thread() is essentially clone(2) with callback run before we
     return to userland, the callbacks either never return or do
     successful do_execve() before returning.

     kernel_execve() is a wrapper for do_execve() - it doesn't need to
     do transition to user mode anymore.

     As a result kernel_thread() and kernel_execve() are
     arch-independent now - they live in kernel/fork.c and fs/exec.c
     resp.  sys_execve() is also in fs/exec.c and it's completely
     architecture-independent.

   - daemonize() is gone, along with its parts in fs/*.c

   - struct pt_regs * is no longer passed to do_fork/copy_process/
     copy_thread/do_execve/search_binary_handler/->load_binary/do_coredump.

   - sys_fork()/sys_vfork()/sys_clone() unified; some architectures
     still need wrappers (ones with callee-saved registers not saved in
     pt_regs on syscall entry), but the main part of those suckers is in
     kernel/fork.c now."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal: (113 commits)
  do_coredump(): get rid of pt_regs argument
  print_fatal_signal(): get rid of pt_regs argument
  ptrace_signal(): get rid of unused arguments
  get rid of ptrace_signal_deliver() arguments
  new helper: signal_pt_regs()
  unify default ptrace_signal_deliver
  flagday: kill pt_regs argument of do_fork()
  death to idle_regs()
  don't pass regs to copy_process()
  flagday: don't pass regs to copy_thread()
  bfin: switch to generic vfork, get rid of pointless wrappers
  xtensa: switch to generic clone()
  openrisc: switch to use of generic fork and clone
  unicore32: switch to generic clone(2)
  score: switch to generic fork/vfork/clone
  c6x: sanitize copy_thread(), get rid of clone(2) wrapper, switch to generic clone()
  take sys_fork/sys_vfork/sys_clone prototypes to linux/syscalls.h
  mn10300: switch to generic fork/vfork/clone
  h8300: switch to generic fork/vfork/clone
  tile: switch to generic clone()
  ...

Conflicts:
	arch/microblaze/include/asm/Kbuild
This commit is contained in:
Linus Torvalds
2012-12-12 12:22:13 -08:00
273 changed files with 1385 additions and 4231 deletions

View File

@@ -200,7 +200,6 @@ struct audit_context {
struct list_head names_list; /* anchor for struct audit_names->list */
char * filterkey; /* key for rule that triggered record */
struct path pwd;
struct audit_context *previous; /* For nested syscalls */
struct audit_aux_data *aux;
struct audit_aux_data *aux_pids;
struct sockaddr_storage *sockaddr;
@@ -1091,29 +1090,13 @@ int audit_alloc(struct task_struct *tsk)
static inline void audit_free_context(struct audit_context *context)
{
struct audit_context *previous;
int count = 0;
do {
previous = context->previous;
if (previous || (count && count < 10)) {
++count;
printk(KERN_ERR "audit(:%d): major=%d name_count=%d:"
" freeing multiple contexts (%d)\n",
context->serial, context->major,
context->name_count, count);
}
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
audit_free_aux(context);
kfree(context->filterkey);
kfree(context->sockaddr);
kfree(context);
context = previous;
} while (context);
if (count >= 10)
printk(KERN_ERR "audit: freed %d contexts\n", count);
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
audit_free_aux(context);
kfree(context->filterkey);
kfree(context->sockaddr);
kfree(context);
}
void audit_log_task_context(struct audit_buffer *ab)
@@ -1783,42 +1766,6 @@ void __audit_syscall_entry(int arch, int major,
if (!context)
return;
/*
* This happens only on certain architectures that make system
* calls in kernel_thread via the entry.S interface, instead of
* with direct calls. (If you are porting to a new
* architecture, hitting this condition can indicate that you
* got the _exit/_leave calls backward in entry.S.)
*
* i386 no
* x86_64 no
* ppc64 yes (see arch/powerpc/platforms/iseries/misc.S)
*
* This also happens with vm86 emulation in a non-nested manner
* (entries without exits), so this case must be caught.
*/
if (context->in_syscall) {
struct audit_context *newctx;
#if AUDIT_DEBUG
printk(KERN_ERR
"audit(:%d) pid=%d in syscall=%d;"
" entering syscall=%d\n",
context->serial, tsk->pid, context->major, major);
#endif
newctx = audit_alloc_context(context->state);
if (newctx) {
newctx->previous = context;
context = newctx;
tsk->audit_context = newctx;
} else {
/* If we can't alloc a new context, the best we
* can do is to leak memory (any pending putname
* will be lost). The only other alternative is
* to abandon auditing. */
audit_zero_context(context, context->state);
}
}
BUG_ON(context->in_syscall || context->name_count);
if (!audit_enabled)
@@ -1881,28 +1828,21 @@ void __audit_syscall_exit(int success, long return_code)
if (!list_empty(&context->killed_trees))
audit_kill_trees(&context->killed_trees);
if (context->previous) {
struct audit_context *new_context = context->previous;
context->previous = NULL;
audit_free_context(context);
tsk->audit_context = new_context;
} else {
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
audit_free_aux(context);
context->aux = NULL;
context->aux_pids = NULL;
context->target_pid = 0;
context->target_sid = 0;
context->sockaddr_len = 0;
context->type = 0;
context->fds[0] = -1;
if (context->state != AUDIT_RECORD_CONTEXT) {
kfree(context->filterkey);
context->filterkey = NULL;
}
tsk->audit_context = context;
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
audit_free_aux(context);
context->aux = NULL;
context->aux_pids = NULL;
context->target_pid = 0;
context->target_sid = 0;
context->sockaddr_len = 0;
context->type = 0;
context->fds[0] = -1;
if (context->state != AUDIT_RECORD_CONTEXT) {
kfree(context->filterkey);
context->filterkey = NULL;
}
tsk->audit_context = context;
}
static inline void handle_one(const struct inode *inode)

View File

@@ -322,43 +322,6 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
}
}
/**
* reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
*
* If a kernel thread is launched as a result of a system call, or if
* it ever exits, it should generally reparent itself to kthreadd so it
* isn't in the way of other processes and is correctly cleaned up on exit.
*
* The various task state such as scheduling policy and priority may have
* been inherited from a user process, so we reset them to sane values here.
*
* NOTE that reparent_to_kthreadd() gives the caller full capabilities.
*/
static void reparent_to_kthreadd(void)
{
write_lock_irq(&tasklist_lock);
ptrace_unlink(current);
/* Reparent to init */
current->real_parent = current->parent = kthreadd_task;
list_move_tail(&current->sibling, &current->real_parent->children);
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
if (task_nice(current) < 0)
set_user_nice(current, 0);
/* cpus_allowed? */
/* rt_priority? */
/* signals? */
memcpy(current->signal->rlim, init_task.signal->rlim,
sizeof(current->signal->rlim));
atomic_inc(&init_cred.usage);
commit_creds(&init_cred);
write_unlock_irq(&tasklist_lock);
}
void __set_special_pids(struct pid *pid)
{
struct task_struct *curr = current->group_leader;
@@ -370,13 +333,6 @@ void __set_special_pids(struct pid *pid)
change_pid(curr, PIDTYPE_PGID, pid);
}
static void set_special_pids(struct pid *pid)
{
write_lock_irq(&tasklist_lock);
__set_special_pids(pid);
write_unlock_irq(&tasklist_lock);
}
/*
* Let kernel threads use this to say that they allow a certain signal.
* Must not be used if kthread was cloned with CLONE_SIGHAND.
@@ -416,54 +372,6 @@ int disallow_signal(int sig)
EXPORT_SYMBOL(disallow_signal);
/*
* Put all the gunge required to become a kernel thread without
* attached user resources in one place where it belongs.
*/
void daemonize(const char *name, ...)
{
va_list args;
sigset_t blocked;
va_start(args, name);
vsnprintf(current->comm, sizeof(current->comm), name, args);
va_end(args);
/*
* If we were started as result of loading a module, close all of the
* user space pages. We don't need them, and if we didn't close them
* they would be locked into memory.
*/
exit_mm(current);
/*
* We don't want to get frozen, in case system-wide hibernation
* or suspend transition begins right now.
*/
current->flags |= (PF_NOFREEZE | PF_KTHREAD);
if (current->nsproxy != &init_nsproxy) {
get_nsproxy(&init_nsproxy);
switch_task_namespaces(current, &init_nsproxy);
}
set_special_pids(&init_struct_pid);
proc_clear_tty(current);
/* Block and flush all signals */
sigfillset(&blocked);
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
/* Become as one with the init task */
daemonize_fs_struct();
daemonize_descriptors();
reparent_to_kthreadd();
}
EXPORT_SYMBOL(daemonize);
#ifdef CONFIG_MM_OWNER
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.

View File

@@ -1129,7 +1129,6 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
*/
static struct task_struct *copy_process(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *child_tidptr,
struct pid *pid,
@@ -1321,7 +1320,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
retval = copy_io(clone_flags, p);
if (retval)
goto bad_fork_cleanup_namespaces;
retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
retval = copy_thread(clone_flags, stack_start, stack_size, p);
if (retval)
goto bad_fork_cleanup_io;
@@ -1510,12 +1509,6 @@ fork_out:
return ERR_PTR(retval);
}
noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
return regs;
}
static inline void init_idle_pids(struct pid_link *links)
{
enum pid_type type;
@@ -1529,10 +1522,7 @@ static inline void init_idle_pids(struct pid_link *links)
struct task_struct * __cpuinit fork_idle(int cpu)
{
struct task_struct *task;
struct pt_regs regs;
task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
&init_struct_pid, 0);
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
if (!IS_ERR(task)) {
init_idle_pids(task->pids);
init_idle(task, cpu);
@@ -1549,7 +1539,6 @@ struct task_struct * __cpuinit fork_idle(int cpu)
*/
long do_fork(unsigned long clone_flags,
unsigned long stack_start,
struct pt_regs *regs,
unsigned long stack_size,
int __user *parent_tidptr,
int __user *child_tidptr)
@@ -1579,7 +1568,7 @@ long do_fork(unsigned long clone_flags,
* requested, no event is reported; otherwise, report if the event
* for the type of forking is enabled.
*/
if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
if (!(clone_flags & CLONE_UNTRACED)) {
if (clone_flags & CLONE_VFORK)
trace = PTRACE_EVENT_VFORK;
else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1591,7 +1580,7 @@ long do_fork(unsigned long clone_flags,
trace = 0;
}
p = copy_process(clone_flags, stack_start, regs, stack_size,
p = copy_process(clone_flags, stack_start, stack_size,
child_tidptr, NULL, trace);
/*
* Do this prior waking up the new thread - the thread pointer
@@ -1635,11 +1624,54 @@ long do_fork(unsigned long clone_flags,
*/
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
(unsigned long)arg, NULL, NULL);
}
#endif
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMU
return do_fork(SIGCHLD, 0, 0, NULL, NULL);
#else
/* can not support in nommu mode */
return(-EINVAL);
#endif
}
#endif
#ifdef __ARCH_WANT_SYS_VFORK
SYSCALL_DEFINE0(vfork)
{
return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
0, NULL, NULL);
}
#endif
#ifdef __ARCH_WANT_SYS_CLONE
#ifdef CONFIG_CLONE_BACKWARDS
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int, tls_val,
int __user *, child_tidptr)
#elif defined(CONFIG_CLONE_BACKWARDS2)
SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
#else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
#endif
{
return do_fork(clone_flags, newsp, 0,
parent_tidptr, child_tidptr);
}
#endif
#ifndef ARCH_MIN_MMSTRUCT_ALIGN
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif

View File

@@ -1159,8 +1159,9 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
return __send_signal(sig, info, t, group, from_ancestor_ns);
}
static void print_fatal_signal(struct pt_regs *regs, int signr)
static void print_fatal_signal(int signr)
{
struct pt_regs *regs = signal_pt_regs();
printk("%s/%d: potentially unexpected fatal signal %d.\n",
current->comm, task_pid_nr(current), signr);
@@ -2131,10 +2132,9 @@ static void do_jobctl_trap(void)
}
}
static int ptrace_signal(int signr, siginfo_t *info,
struct pt_regs *regs, void *cookie)
static int ptrace_signal(int signr, siginfo_t *info)
{
ptrace_signal_deliver(regs, cookie);
ptrace_signal_deliver();
/*
* We do not check sig_kernel_stop(signr) but set this marker
* unconditionally because we do not know whether debugger will
@@ -2257,8 +2257,7 @@ relock:
break; /* will return 0 */
if (unlikely(current->ptrace) && signr != SIGKILL) {
signr = ptrace_signal(signr, info,
regs, cookie);
signr = ptrace_signal(signr, info);
if (!signr)
continue;
}
@@ -2343,7 +2342,7 @@ relock:
if (sig_kernel_coredump(signr)) {
if (print_fatal_signals)
print_fatal_signal(regs, info->si_signo);
print_fatal_signal(info->si_signo);
/*
* If it was able to dump core, this kills all
* other threads in the group and synchronizes with
@@ -2352,7 +2351,7 @@ relock:
* first and our do_group_exit call below will use
* that value and ignore the one we pass it.
*/
do_coredump(info, regs);
do_coredump(info);
}
/*