Merge branch 'x86/asm' into x86/core, to prepare for new patch
Collect all changes to arch/x86/entry/entry_64.S, before applying patch that changes most of the file. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
10
arch/x86/entry/Makefile
Normal file
10
arch/x86/entry/Makefile
Normal file
@@ -0,0 +1,10 @@
|
||||
#
|
||||
# Makefile for the x86 low level entry code
|
||||
#
|
||||
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
|
||||
|
||||
obj-y += vdso/
|
||||
obj-y += vsyscall/
|
||||
|
||||
obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o
|
||||
|
243
arch/x86/entry/calling.h
Normal file
243
arch/x86/entry/calling.h
Normal file
@@ -0,0 +1,243 @@
|
||||
/*
|
||||
|
||||
x86 function call convention, 64-bit:
|
||||
-------------------------------------
|
||||
arguments | callee-saved | extra caller-saved | return
|
||||
[callee-clobbered] | | [callee-clobbered] |
|
||||
---------------------------------------------------------------------------
|
||||
rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
|
||||
|
||||
( rsp is obviously invariant across normal function calls. (gcc can 'merge'
|
||||
functions when it sees tail-call optimization possibilities) rflags is
|
||||
clobbered. Leftover arguments are passed over the stack frame.)
|
||||
|
||||
[*] In the frame-pointers case rbp is fixed to the stack frame.
|
||||
|
||||
[**] for struct return values wider than 64 bits the return convention is a
|
||||
bit more complex: up to 128 bits width we return small structures
|
||||
straight in rax, rdx. For structures larger than that (3 words or
|
||||
larger) the caller puts a pointer to an on-stack return struct
|
||||
[allocated in the caller's stack frame] into the first argument - i.e.
|
||||
into rdi. All other arguments shift up by one in this case.
|
||||
Fortunately this case is rare in the kernel.
|
||||
|
||||
For 32-bit we have the following conventions - kernel is built with
|
||||
-mregparm=3 and -freg-struct-return:
|
||||
|
||||
x86 function calling convention, 32-bit:
|
||||
----------------------------------------
|
||||
arguments | callee-saved | extra caller-saved | return
|
||||
[callee-clobbered] | | [callee-clobbered] |
|
||||
-------------------------------------------------------------------------
|
||||
eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
|
||||
|
||||
( here too esp is obviously invariant across normal function calls. eflags
|
||||
is clobbered. Leftover arguments are passed over the stack frame. )
|
||||
|
||||
[*] In the frame-pointers case ebp is fixed to the stack frame.
|
||||
|
||||
[**] We build with -freg-struct-return, which on 32-bit means similar
|
||||
semantics as on 64-bit: edx can be used for a second return value
|
||||
(i.e. covering integer and structure sizes up to 64 bits) - after that
|
||||
it gets more complex and more expensive: 3-word or larger struct returns
|
||||
get done in the caller's frame and the pointer to the return struct goes
|
||||
into regparm0, i.e. eax - the other arguments shift up and the
|
||||
function's register parameters degenerate to regparm=2 in essence.
|
||||
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
* 64-bit system call stack frame layout defines and helpers,
|
||||
* for assembly code:
|
||||
*/
|
||||
|
||||
/* The layout forms the "struct pt_regs" on the stack: */
|
||||
/*
|
||||
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
|
||||
* unless syscall needs a complete, fully filled "struct pt_regs".
|
||||
*/
|
||||
#define R15 0*8
|
||||
#define R14 1*8
|
||||
#define R13 2*8
|
||||
#define R12 3*8
|
||||
#define RBP 4*8
|
||||
#define RBX 5*8
|
||||
/* These regs are callee-clobbered. Always saved on kernel entry. */
|
||||
#define R11 6*8
|
||||
#define R10 7*8
|
||||
#define R9 8*8
|
||||
#define R8 9*8
|
||||
#define RAX 10*8
|
||||
#define RCX 11*8
|
||||
#define RDX 12*8
|
||||
#define RSI 13*8
|
||||
#define RDI 14*8
|
||||
/*
|
||||
* On syscall entry, this is syscall#. On CPU exception, this is error code.
|
||||
* On hw interrupt, it's IRQ number:
|
||||
*/
|
||||
#define ORIG_RAX 15*8
|
||||
/* Return frame for iretq */
|
||||
#define RIP 16*8
|
||||
#define CS 17*8
|
||||
#define EFLAGS 18*8
|
||||
#define RSP 19*8
|
||||
#define SS 20*8
|
||||
|
||||
#define SIZEOF_PTREGS 21*8
|
||||
|
||||
.macro ALLOC_PT_GPREGS_ON_STACK addskip=0
|
||||
addq $-(15*8+\addskip), %rsp
|
||||
.endm
|
||||
|
||||
.macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
|
||||
.if \r11
|
||||
movq %r11, 6*8+\offset(%rsp)
|
||||
.endif
|
||||
.if \r8910
|
||||
movq %r10, 7*8+\offset(%rsp)
|
||||
movq %r9, 8*8+\offset(%rsp)
|
||||
movq %r8, 9*8+\offset(%rsp)
|
||||
.endif
|
||||
.if \rax
|
||||
movq %rax, 10*8+\offset(%rsp)
|
||||
.endif
|
||||
.if \rcx
|
||||
movq %rcx, 11*8+\offset(%rsp)
|
||||
.endif
|
||||
movq %rdx, 12*8+\offset(%rsp)
|
||||
movq %rsi, 13*8+\offset(%rsp)
|
||||
movq %rdi, 14*8+\offset(%rsp)
|
||||
.endm
|
||||
.macro SAVE_C_REGS offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
|
||||
SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_R891011
|
||||
SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RCX_R891011
|
||||
SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
|
||||
.endm
|
||||
.macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
|
||||
SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
|
||||
.endm
|
||||
|
||||
.macro SAVE_EXTRA_REGS offset=0
|
||||
movq %r15, 0*8+\offset(%rsp)
|
||||
movq %r14, 1*8+\offset(%rsp)
|
||||
movq %r13, 2*8+\offset(%rsp)
|
||||
movq %r12, 3*8+\offset(%rsp)
|
||||
movq %rbp, 4*8+\offset(%rsp)
|
||||
movq %rbx, 5*8+\offset(%rsp)
|
||||
.endm
|
||||
.macro SAVE_EXTRA_REGS_RBP offset=0
|
||||
movq %rbp, 4*8+\offset(%rsp)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
movq 0*8+\offset(%rsp), %r15
|
||||
movq 1*8+\offset(%rsp), %r14
|
||||
movq 2*8+\offset(%rsp), %r13
|
||||
movq 3*8+\offset(%rsp), %r12
|
||||
movq 4*8+\offset(%rsp), %rbp
|
||||
movq 5*8+\offset(%rsp), %rbx
|
||||
.endm
|
||||
|
||||
.macro ZERO_EXTRA_REGS
|
||||
xorl %r15d, %r15d
|
||||
xorl %r14d, %r14d
|
||||
xorl %r13d, %r13d
|
||||
xorl %r12d, %r12d
|
||||
xorl %ebp, %ebp
|
||||
xorl %ebx, %ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
.if \rstor_r11
|
||||
movq 6*8(%rsp), %r11
|
||||
.endif
|
||||
.if \rstor_r8910
|
||||
movq 7*8(%rsp), %r10
|
||||
movq 8*8(%rsp), %r9
|
||||
movq 9*8(%rsp), %r8
|
||||
.endif
|
||||
.if \rstor_rax
|
||||
movq 10*8(%rsp), %rax
|
||||
.endif
|
||||
.if \rstor_rcx
|
||||
movq 11*8(%rsp), %rcx
|
||||
.endif
|
||||
.if \rstor_rdx
|
||||
movq 12*8(%rsp), %rdx
|
||||
.endif
|
||||
movq 13*8(%rsp), %rsi
|
||||
movq 14*8(%rsp), %rdi
|
||||
.endm
|
||||
.macro RESTORE_C_REGS
|
||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RAX
|
||||
RESTORE_C_REGS_HELPER 0,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX
|
||||
RESTORE_C_REGS_HELPER 1,0,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_R11
|
||||
RESTORE_C_REGS_HELPER 1,1,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,0
|
||||
.endm
|
||||
.macro RESTORE_RSI_RDI_RDX
|
||||
RESTORE_C_REGS_HELPER 0,0,0,0,1
|
||||
.endm
|
||||
|
||||
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
subq $-(15*8+\addskip), %rsp
|
||||
.endm
|
||||
|
||||
.macro icebp
|
||||
.byte 0xf1
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
* For 32bit only simplified versions of SAVE_ALL/RESTORE_ALL. These
|
||||
* are different from the entry_32.S versions in not changing the segment
|
||||
* registers. So only suitable for in kernel use, not when transitioning
|
||||
* from or to user space. The resulting stack frame is not a standard
|
||||
* pt_regs frame. The main use case is calling C code from assembler
|
||||
* when all the registers need to be preserved.
|
||||
*/
|
||||
|
||||
.macro SAVE_ALL
|
||||
pushl %eax
|
||||
pushl %ebp
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %edx
|
||||
pushl %ecx
|
||||
pushl %ebx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ALL
|
||||
popl %ebx
|
||||
popl %ecx
|
||||
popl %edx
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebp
|
||||
popl %eax
|
||||
.endm
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
1248
arch/x86/entry/entry_32.S
Normal file
1248
arch/x86/entry/entry_32.S
Normal file
File diff suppressed because it is too large
Load Diff
1447
arch/x86/entry/entry_64.S
Normal file
1447
arch/x86/entry/entry_64.S
Normal file
File diff suppressed because it is too large
Load Diff
547
arch/x86/entry/entry_64_compat.S
Normal file
547
arch/x86/entry/entry_64_compat.S
Normal file
@@ -0,0 +1,547 @@
|
||||
/*
|
||||
* Compatibility mode system call entry point for x86-64.
|
||||
*
|
||||
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
|
||||
*/
|
||||
#include "calling.h"
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/current.h>
|
||||
#include <asm/errno.h>
|
||||
#include <asm/ia32_unistd.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/segment.h>
|
||||
#include <asm/irqflags.h>
|
||||
#include <asm/asm.h>
|
||||
#include <asm/smap.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
||||
#include <linux/elf-em.h>
|
||||
#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
|
||||
#define __AUDIT_ARCH_LE 0x40000000
|
||||
|
||||
#ifndef CONFIG_AUDITSYSCALL
|
||||
# define sysexit_audit ia32_ret_from_sys_call
|
||||
# define sysretl_audit ia32_ret_from_sys_call
|
||||
#endif
|
||||
|
||||
.section .entry.text, "ax"
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
ENTRY(native_usergs_sysret32)
|
||||
swapgs
|
||||
sysretl
|
||||
ENDPROC(native_usergs_sysret32)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit SYSENTER instruction entry.
|
||||
*
|
||||
* SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
|
||||
* IF and VM in rflags are cleared (IOW: interrupts are off).
|
||||
* SYSENTER does not save anything on the stack,
|
||||
* and does not save old rip (!!!) and rflags.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp user stack
|
||||
* 0(%ebp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %ebp, %ebp
|
||||
movl %eax, %eax
|
||||
|
||||
movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %rbp /* pt_regs->sp */
|
||||
pushfq /* pt_regs->flags */
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
pushq %r10 /* pt_regs->ip = thread_info->sysenter_return */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
cld
|
||||
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
|
||||
|
||||
/*
|
||||
* no need to do an access_ok check here because rbp has been
|
||||
* 32-bit zero extended
|
||||
*/
|
||||
ASM_STAC
|
||||
1: movl (%rbp), %ebp
|
||||
_ASM_EXTABLE(1b, ia32_badarg)
|
||||
ASM_CLAC
|
||||
|
||||
/*
|
||||
* Sysenter doesn't filter flags, so we need to clear NT
|
||||
* ourselves. To save a few cycles, we can check whether
|
||||
* NT was set instead of doing an unconditional popfq.
|
||||
*/
|
||||
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
|
||||
jnz sysenter_fix_flags
|
||||
sysenter_flags_fixed:
|
||||
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz sysenter_tracesys
|
||||
|
||||
sysenter_do_call:
|
||||
/* 32-bit syscall -> 64-bit C ABI argument conversion */
|
||||
movl %edi, %r8d /* arg5 */
|
||||
movl %ebp, %r9d /* arg6 */
|
||||
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx, %edi /* arg1 */
|
||||
movl %edx, %edx /* arg3 (zero extension) */
|
||||
sysenter_dispatch:
|
||||
cmpq $(IA32_NR_syscalls-1), %rax
|
||||
ja 1f
|
||||
call *ia32_sys_call_table(, %rax, 8)
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz sysexit_audit
|
||||
sysexit_from_sys_call:
|
||||
/*
|
||||
* NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
|
||||
* NMI between STI and SYSEXIT has poorly specified behavior,
|
||||
* and and NMI followed by an IRQ with usergs is fatal. So
|
||||
* we just pretend we're using SYSEXIT but we really use
|
||||
* SYSRETL instead.
|
||||
*
|
||||
* This code path is still called 'sysexit' because it pairs
|
||||
* with 'sysenter' and it uses the SYSENTER calling convention.
|
||||
*/
|
||||
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
movl RIP(%rsp), %ecx /* User %eip */
|
||||
RESTORE_RSI_RDI
|
||||
xorl %edx, %edx /* Do not leak kernel information */
|
||||
xorq %r8, %r8
|
||||
xorq %r9, %r9
|
||||
xorq %r10, %r10
|
||||
movl EFLAGS(%rsp), %r11d /* User eflags */
|
||||
TRACE_IRQS_ON
|
||||
|
||||
/*
|
||||
* SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
|
||||
* since it avoids a dicey window with interrupts enabled.
|
||||
*/
|
||||
movl RSP(%rsp), %esp
|
||||
|
||||
/*
|
||||
* USERGS_SYSRET32 does:
|
||||
* gsbase = user's gs base
|
||||
* eip = ecx
|
||||
* rflags = r11
|
||||
* cs = __USER32_CS
|
||||
* ss = __USER_DS
|
||||
*
|
||||
* The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
|
||||
*
|
||||
* pop %ebp
|
||||
* pop %edx
|
||||
* pop %ecx
|
||||
*
|
||||
* Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
|
||||
* avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
|
||||
* address (already known to user code), and R12-R15 are
|
||||
* callee-saved and therefore don't contain any interesting
|
||||
* kernel data.
|
||||
*/
|
||||
USERGS_SYSRET32
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
.macro auditsys_entry_common
|
||||
movl %esi, %r8d /* 5th arg: 4th syscall arg */
|
||||
movl %ecx, %r9d /* swap with edx */
|
||||
movl %edx, %ecx /* 4th arg: 3rd syscall arg */
|
||||
movl %r9d, %edx /* 3rd arg: 2nd syscall arg */
|
||||
movl %ebx, %esi /* 2nd arg: 1st syscall arg */
|
||||
movl %eax, %edi /* 1st arg: syscall number */
|
||||
call __audit_syscall_entry
|
||||
movl ORIG_RAX(%rsp), %eax /* reload syscall number */
|
||||
movl %ebx, %edi /* reload 1st syscall arg */
|
||||
movl RCX(%rsp), %esi /* reload 2nd syscall arg */
|
||||
movl RDX(%rsp), %edx /* reload 3rd syscall arg */
|
||||
movl RSI(%rsp), %ecx /* reload 4th syscall arg */
|
||||
movl RDI(%rsp), %r8d /* reload 5th syscall arg */
|
||||
.endm
|
||||
|
||||
.macro auditsys_exit exit
|
||||
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_ret_from_sys_call
|
||||
TRACE_IRQS_ON
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
movl %eax, %esi /* second arg, syscall return value */
|
||||
cmpl $-MAX_ERRNO, %eax /* is it an error ? */
|
||||
jbe 1f
|
||||
movslq %eax, %rsi /* if error sign extend to 64 bits */
|
||||
1: setbe %al /* 1 if error, 0 if not */
|
||||
movzbl %al, %edi /* zero-extend that into %edi */
|
||||
call __audit_syscall_exit
|
||||
movq RAX(%rsp), %rax /* reload syscall return value */
|
||||
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %edi
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz \exit
|
||||
xorl %eax, %eax /* Do not leak kernel information */
|
||||
movq %rax, R11(%rsp)
|
||||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
jmp int_with_check
|
||||
.endm
|
||||
|
||||
sysenter_auditsys:
|
||||
auditsys_entry_common
|
||||
movl %ebp, %r9d /* reload 6th syscall arg */
|
||||
jmp sysenter_dispatch
|
||||
|
||||
sysexit_audit:
|
||||
auditsys_exit sysexit_from_sys_call
|
||||
#endif
|
||||
|
||||
sysenter_fix_flags:
|
||||
pushq $(X86_EFLAGS_IF|X86_EFLAGS_FIXED)
|
||||
popfq
|
||||
jmp sysenter_flags_fixed
|
||||
|
||||
sysenter_tracesys:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz sysenter_auditsys
|
||||
#endif
|
||||
SAVE_EXTRA_REGS
|
||||
xorl %eax, %eax /* Do not leak kernel information */
|
||||
movq %rax, R11(%rsp)
|
||||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
movq %rsp, %rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
|
||||
/* Reload arg registers from stack. (see sysenter_tracesys) */
|
||||
movl RCX(%rsp), %ecx
|
||||
movl RDX(%rsp), %edx
|
||||
movl RSI(%rsp), %esi
|
||||
movl RDI(%rsp), %edi
|
||||
movl %eax, %eax /* zero extension */
|
||||
|
||||
RESTORE_EXTRA_REGS
|
||||
jmp sysenter_do_call
|
||||
ENDPROC(entry_SYSENTER_compat)
|
||||
|
||||
/*
|
||||
* 32-bit SYSCALL instruction entry.
|
||||
*
|
||||
* 32-bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
|
||||
* then loads new ss, cs, and rip from previously programmed MSRs.
|
||||
* rflags gets masked by a value from another MSR (so CLD and CLAC
|
||||
* are not needed). SYSCALL does not save anything on the stack
|
||||
* and does not change rsp.
|
||||
*
|
||||
* Note: rflags saving+masking-with-MSR happens only in Long mode
|
||||
* (in legacy 32-bit mode, IF, RF and VM bits are cleared and that's it).
|
||||
* Don't get confused: rflags saving+masking depends on Long Mode Active bit
|
||||
* (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
|
||||
* or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ecx return address
|
||||
* ebx arg1
|
||||
* ebp arg2 (note: not saved in the stack frame, should not be touched)
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* esp user stack
|
||||
* 0(%esp) arg6
|
||||
*
|
||||
* This is purely a fast path. For anything complicated we use the int 0x80
|
||||
* path below. We set up a complete hardware stack frame to share code
|
||||
* with the int 0x80 path.
|
||||
*/
|
||||
ENTRY(entry_SYSCALL_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
SWAPGS_UNSAFE_STACK
|
||||
movl %esp, %r8d
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %eax, %eax
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
pushq $__USER32_DS /* pt_regs->ss */
|
||||
pushq %r8 /* pt_regs->sp */
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER32_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rbp /* pt_regs->cx */
|
||||
movl %ebp, %ecx
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
sub $(10*8), %rsp /* pt_regs->r8-11, bp, bx, r12-15 not saved */
|
||||
|
||||
/*
|
||||
* No need to do an access_ok check here because r8 has been
|
||||
* 32-bit zero extended:
|
||||
*/
|
||||
ASM_STAC
|
||||
1: movl (%r8), %ebp
|
||||
_ASM_EXTABLE(1b, ia32_badarg)
|
||||
ASM_CLAC
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz cstar_tracesys
|
||||
|
||||
cstar_do_call:
|
||||
/* 32-bit syscall -> 64-bit C ABI argument conversion */
|
||||
movl %edi, %r8d /* arg5 */
|
||||
movl %ebp, %r9d /* arg6 */
|
||||
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx, %edi /* arg1 */
|
||||
movl %edx, %edx /* arg3 (zero extension) */
|
||||
|
||||
cstar_dispatch:
|
||||
cmpq $(IA32_NR_syscalls-1), %rax
|
||||
ja 1f
|
||||
|
||||
call *ia32_sys_call_table(, %rax, 8)
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz sysretl_audit
|
||||
|
||||
sysretl_from_sys_call:
|
||||
andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
movl RCX(%rsp), %ebp
|
||||
RESTORE_RSI_RDI_RDX
|
||||
movl RIP(%rsp), %ecx
|
||||
movl EFLAGS(%rsp), %r11d
|
||||
xorq %r10, %r10
|
||||
xorq %r9, %r9
|
||||
xorq %r8, %r8
|
||||
TRACE_IRQS_ON
|
||||
movl RSP(%rsp), %esp
|
||||
/*
|
||||
* 64-bit->32-bit SYSRET restores eip from ecx,
|
||||
* eflags from r11 (but RF and VM bits are forced to 0),
|
||||
* cs and ss are loaded from MSRs.
|
||||
* (Note: 32-bit->32-bit SYSRET is different: since r11
|
||||
* does not exist, it merely sets eflags.IF=1).
|
||||
*
|
||||
* NB: On AMD CPUs with the X86_BUG_SYSRET_SS_ATTRS bug, the ss
|
||||
* descriptor is not reinitialized. This means that we must
|
||||
* avoid SYSRET with SS == NULL, which could happen if we schedule,
|
||||
* exit the kernel, and re-enter using an interrupt vector. (All
|
||||
* interrupt entries on x86_64 set SS to NULL.) We prevent that
|
||||
* from happening by reloading SS in __switch_to.
|
||||
*/
|
||||
USERGS_SYSRET32
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
cstar_auditsys:
|
||||
auditsys_entry_common
|
||||
movl %ebp, %r9d /* reload 6th syscall arg */
|
||||
jmp cstar_dispatch
|
||||
|
||||
sysretl_audit:
|
||||
auditsys_exit sysretl_from_sys_call
|
||||
#endif
|
||||
|
||||
cstar_tracesys:
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jz cstar_auditsys
|
||||
#endif
|
||||
SAVE_EXTRA_REGS
|
||||
xorl %eax, %eax /* Do not leak kernel information */
|
||||
movq %rax, R11(%rsp)
|
||||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
movq %rsp, %rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
|
||||
/* Reload arg registers from stack. (see sysenter_tracesys) */
|
||||
movl RCX(%rsp), %ecx
|
||||
movl RDX(%rsp), %edx
|
||||
movl RSI(%rsp), %esi
|
||||
movl RDI(%rsp), %edi
|
||||
movl %eax, %eax /* zero extension */
|
||||
|
||||
RESTORE_EXTRA_REGS
|
||||
jmp cstar_do_call
|
||||
END(entry_SYSCALL_compat)
|
||||
|
||||
ia32_badarg:
|
||||
ASM_CLAC
|
||||
movq $-EFAULT, %rax
|
||||
jmp ia32_sysret
|
||||
|
||||
ia32_ret_from_sys_call:
|
||||
xorl %eax, %eax /* Do not leak kernel information */
|
||||
movq %rax, R11(%rsp)
|
||||
movq %rax, R10(%rsp)
|
||||
movq %rax, R9(%rsp)
|
||||
movq %rax, R8(%rsp)
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
/*
|
||||
* Emulated IA32 system calls via int 0x80.
|
||||
*
|
||||
* Arguments:
|
||||
* eax system call number
|
||||
* ebx arg1
|
||||
* ecx arg2
|
||||
* edx arg3
|
||||
* esi arg4
|
||||
* edi arg5
|
||||
* ebp arg6 (note: not saved in the stack frame, should not be touched)
|
||||
*
|
||||
* Notes:
|
||||
* Uses the same stack frame as the x86-64 version.
|
||||
* All registers except eax must be saved (but ptrace may violate that).
|
||||
* Arguments are zero extended. For system calls that want sign extension and
|
||||
* take long arguments a wrapper is needed. Most calls can just be called
|
||||
* directly.
|
||||
* Assumes it is only called from user space and entered with interrupts off.
|
||||
*/
|
||||
|
||||
ENTRY(entry_INT80_compat)
|
||||
/*
|
||||
* Interrupts are off on entry.
|
||||
* We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
|
||||
* it is too small to ever cause noticeable irq latency.
|
||||
*/
|
||||
PARAVIRT_ADJUST_EXCEPTION_FRAME
|
||||
SWAPGS
|
||||
ENABLE_INTERRUPTS(CLBR_NONE)
|
||||
|
||||
/* Zero-extending 32-bit regs, do not remove */
|
||||
movl %eax, %eax
|
||||
|
||||
/* Construct struct pt_regs on stack (iret frame is already on stack) */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
pushq %rcx /* pt_regs->cx */
|
||||
pushq $-ENOSYS /* pt_regs->ax */
|
||||
pushq $0 /* pt_regs->r8 */
|
||||
pushq $0 /* pt_regs->r9 */
|
||||
pushq $0 /* pt_regs->r10 */
|
||||
pushq $0 /* pt_regs->r11 */
|
||||
cld
|
||||
sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */
|
||||
|
||||
orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
jnz ia32_tracesys
|
||||
|
||||
ia32_do_call:
|
||||
/* 32-bit syscall -> 64-bit C ABI argument conversion */
|
||||
movl %edi, %r8d /* arg5 */
|
||||
movl %ebp, %r9d /* arg6 */
|
||||
xchg %ecx, %esi /* rsi:arg2, rcx:arg4 */
|
||||
movl %ebx, %edi /* arg1 */
|
||||
movl %edx, %edx /* arg3 (zero extension) */
|
||||
cmpq $(IA32_NR_syscalls-1), %rax
|
||||
ja 1f
|
||||
|
||||
call *ia32_sys_call_table(, %rax, 8) /* RIP relative */
|
||||
|
||||
ia32_sysret:
|
||||
movq %rax, RAX(%rsp)
|
||||
1:
|
||||
jmp int_ret_from_sys_call
|
||||
|
||||
ia32_tracesys:
|
||||
SAVE_EXTRA_REGS
|
||||
movq %rsp, %rdi /* &pt_regs -> arg1 */
|
||||
call syscall_trace_enter
|
||||
/*
|
||||
* Reload arg registers from stack in case ptrace changed them.
|
||||
* Don't reload %eax because syscall_trace_enter() returned
|
||||
* the %rax value we should see. But do truncate it to 32 bits.
|
||||
* If it's -1 to make us punt the syscall, then (u32)-1 is still
|
||||
* an appropriately invalid value.
|
||||
*/
|
||||
movl RCX(%rsp), %ecx
|
||||
movl RDX(%rsp), %edx
|
||||
movl RSI(%rsp), %esi
|
||||
movl RDI(%rsp), %edi
|
||||
movl %eax, %eax /* zero extension */
|
||||
RESTORE_EXTRA_REGS
|
||||
jmp ia32_do_call
|
||||
END(entry_INT80_compat)
|
||||
|
||||
.macro PTREGSCALL label, func
|
||||
ALIGN
|
||||
GLOBAL(\label)
|
||||
leaq \func(%rip), %rax
|
||||
jmp ia32_ptregs_common
|
||||
.endm
|
||||
|
||||
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
|
||||
PTREGSCALL stub32_sigreturn, sys32_sigreturn
|
||||
PTREGSCALL stub32_fork, sys_fork
|
||||
PTREGSCALL stub32_vfork, sys_vfork
|
||||
|
||||
ALIGN
|
||||
GLOBAL(stub32_clone)
|
||||
leaq sys_clone(%rip), %rax
|
||||
/*
|
||||
* The 32-bit clone ABI is: clone(..., int tls_val, int *child_tidptr).
|
||||
* The 64-bit clone ABI is: clone(..., int *child_tidptr, int tls_val).
|
||||
*
|
||||
* The native 64-bit kernel's sys_clone() implements the latter,
|
||||
* so we need to swap arguments here before calling it:
|
||||
*/
|
||||
xchg %r8, %rcx
|
||||
jmp ia32_ptregs_common
|
||||
|
||||
ALIGN
|
||||
ia32_ptregs_common:
|
||||
SAVE_EXTRA_REGS 8
|
||||
call *%rax
|
||||
RESTORE_EXTRA_REGS 8
|
||||
ret
|
||||
END(ia32_ptregs_common)
|
33
arch/x86/entry/syscall_32.c
Normal file
33
arch/x86/entry/syscall_32.c
Normal file
@@ -0,0 +1,33 @@
|
||||
/* System call table for i386. */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/sys.h>
|
||||
#include <linux/cache.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
#define SYM(sym, compat) compat
|
||||
#else
|
||||
#define SYM(sym, compat) sym
|
||||
#define ia32_sys_call_table sys_call_table
|
||||
#define __NR_entry_INT80_compat_max __NR_syscall_max
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
|
||||
#include <asm/syscalls_32.h>
|
||||
#undef __SYSCALL_I386
|
||||
|
||||
#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
|
||||
|
||||
typedef asmlinkage void (*sys_call_ptr_t)(void);
|
||||
|
||||
extern asmlinkage void sys_ni_syscall(void);
|
||||
|
||||
__visible const sys_call_ptr_t ia32_sys_call_table[__NR_entry_INT80_compat_max+1] = {
|
||||
/*
|
||||
* Smells like a compiler bug -- it doesn't work
|
||||
* when the & below is removed.
|
||||
*/
|
||||
[0 ... __NR_entry_INT80_compat_max] = &sys_ni_syscall,
|
||||
#include <asm/syscalls_32.h>
|
||||
};
|
32
arch/x86/entry/syscall_64.c
Normal file
32
arch/x86/entry/syscall_64.c
Normal file
@@ -0,0 +1,32 @@
|
||||
/* System call table for x86-64. */
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/sys.h>
|
||||
#include <linux/cache.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/syscall.h>
|
||||
|
||||
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
# define __SYSCALL_X32(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
|
||||
#else
|
||||
# define __SYSCALL_X32(nr, sym, compat) /* nothing */
|
||||
#endif
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) extern asmlinkage void sym(void) ;
|
||||
#include <asm/syscalls_64.h>
|
||||
#undef __SYSCALL_64
|
||||
|
||||
#define __SYSCALL_64(nr, sym, compat) [nr] = sym,
|
||||
|
||||
extern void sys_ni_syscall(void);
|
||||
|
||||
asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
|
||||
/*
|
||||
* Smells like a compiler bug -- it doesn't work
|
||||
* when the & below is removed.
|
||||
*/
|
||||
[0 ... __NR_syscall_max] = &sys_ni_syscall,
|
||||
#include <asm/syscalls_64.h>
|
||||
};
|
69
arch/x86/entry/syscalls/Makefile
Normal file
69
arch/x86/entry/syscalls/Makefile
Normal file
@@ -0,0 +1,69 @@
|
||||
out := $(obj)/../../include/generated/asm
|
||||
uapi := $(obj)/../../include/generated/uapi/asm
|
||||
|
||||
# Create output directory if not already present
|
||||
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
|
||||
$(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
|
||||
|
||||
syscall32 := $(srctree)/$(src)/syscall_32.tbl
|
||||
syscall64 := $(srctree)/$(src)/syscall_64.tbl
|
||||
|
||||
syshdr := $(srctree)/$(src)/syscallhdr.sh
|
||||
systbl := $(srctree)/$(src)/syscalltbl.sh
|
||||
|
||||
quiet_cmd_syshdr = SYSHDR $@
|
||||
cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
|
||||
'$(syshdr_abi_$(basetarget))' \
|
||||
'$(syshdr_pfx_$(basetarget))' \
|
||||
'$(syshdr_offset_$(basetarget))'
|
||||
quiet_cmd_systbl = SYSTBL $@
|
||||
cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
|
||||
|
||||
quiet_cmd_hypercalls = HYPERCALLS $@
|
||||
cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^)
|
||||
|
||||
syshdr_abi_unistd_32 := i386
|
||||
$(uapi)/unistd_32.h: $(syscall32) $(syshdr)
|
||||
$(call if_changed,syshdr)
|
||||
|
||||
syshdr_abi_unistd_32_ia32 := i386
|
||||
syshdr_pfx_unistd_32_ia32 := ia32_
|
||||
$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
|
||||
$(call if_changed,syshdr)
|
||||
|
||||
syshdr_abi_unistd_x32 := common,x32
|
||||
syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
|
||||
$(uapi)/unistd_x32.h: $(syscall64) $(syshdr)
|
||||
$(call if_changed,syshdr)
|
||||
|
||||
syshdr_abi_unistd_64 := common,64
|
||||
$(uapi)/unistd_64.h: $(syscall64) $(syshdr)
|
||||
$(call if_changed,syshdr)
|
||||
|
||||
syshdr_abi_unistd_64_x32 := x32
|
||||
syshdr_pfx_unistd_64_x32 := x32_
|
||||
$(out)/unistd_64_x32.h: $(syscall64) $(syshdr)
|
||||
$(call if_changed,syshdr)
|
||||
|
||||
$(out)/syscalls_32.h: $(syscall32) $(systbl)
|
||||
$(call if_changed,systbl)
|
||||
$(out)/syscalls_64.h: $(syscall64) $(systbl)
|
||||
$(call if_changed,systbl)
|
||||
|
||||
$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh
|
||||
$(call if_changed,hypercalls)
|
||||
|
||||
$(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h
|
||||
|
||||
uapisyshdr-y += unistd_32.h unistd_64.h unistd_x32.h
|
||||
syshdr-y += syscalls_32.h
|
||||
syshdr-$(CONFIG_X86_64) += unistd_32_ia32.h unistd_64_x32.h
|
||||
syshdr-$(CONFIG_X86_64) += syscalls_64.h
|
||||
syshdr-$(CONFIG_XEN) += xen-hypercalls.h
|
||||
|
||||
targets += $(uapisyshdr-y) $(syshdr-y)
|
||||
|
||||
PHONY += all
|
||||
all: $(addprefix $(uapi)/,$(uapisyshdr-y))
|
||||
all: $(addprefix $(out)/,$(syshdr-y))
|
||||
@:
|
367
arch/x86/entry/syscalls/syscall_32.tbl
Normal file
367
arch/x86/entry/syscalls/syscall_32.tbl
Normal file
@@ -0,0 +1,367 @@
|
||||
#
|
||||
# 32-bit system call numbers and entry vectors
|
||||
#
|
||||
# The format is:
|
||||
# <number> <abi> <name> <entry point> <compat entry point>
|
||||
#
|
||||
# The abi is always "i386" for this file.
|
||||
#
|
||||
0 i386 restart_syscall sys_restart_syscall
|
||||
1 i386 exit sys_exit
|
||||
2 i386 fork sys_fork stub32_fork
|
||||
3 i386 read sys_read
|
||||
4 i386 write sys_write
|
||||
5 i386 open sys_open compat_sys_open
|
||||
6 i386 close sys_close
|
||||
7 i386 waitpid sys_waitpid sys32_waitpid
|
||||
8 i386 creat sys_creat
|
||||
9 i386 link sys_link
|
||||
10 i386 unlink sys_unlink
|
||||
11 i386 execve sys_execve stub32_execve
|
||||
12 i386 chdir sys_chdir
|
||||
13 i386 time sys_time compat_sys_time
|
||||
14 i386 mknod sys_mknod
|
||||
15 i386 chmod sys_chmod
|
||||
16 i386 lchown sys_lchown16
|
||||
17 i386 break
|
||||
18 i386 oldstat sys_stat
|
||||
19 i386 lseek sys_lseek compat_sys_lseek
|
||||
20 i386 getpid sys_getpid
|
||||
21 i386 mount sys_mount compat_sys_mount
|
||||
22 i386 umount sys_oldumount
|
||||
23 i386 setuid sys_setuid16
|
||||
24 i386 getuid sys_getuid16
|
||||
25 i386 stime sys_stime compat_sys_stime
|
||||
26 i386 ptrace sys_ptrace compat_sys_ptrace
|
||||
27 i386 alarm sys_alarm
|
||||
28 i386 oldfstat sys_fstat
|
||||
29 i386 pause sys_pause
|
||||
30 i386 utime sys_utime compat_sys_utime
|
||||
31 i386 stty
|
||||
32 i386 gtty
|
||||
33 i386 access sys_access
|
||||
34 i386 nice sys_nice
|
||||
35 i386 ftime
|
||||
36 i386 sync sys_sync
|
||||
37 i386 kill sys_kill
|
||||
38 i386 rename sys_rename
|
||||
39 i386 mkdir sys_mkdir
|
||||
40 i386 rmdir sys_rmdir
|
||||
41 i386 dup sys_dup
|
||||
42 i386 pipe sys_pipe
|
||||
43 i386 times sys_times compat_sys_times
|
||||
44 i386 prof
|
||||
45 i386 brk sys_brk
|
||||
46 i386 setgid sys_setgid16
|
||||
47 i386 getgid sys_getgid16
|
||||
48 i386 signal sys_signal
|
||||
49 i386 geteuid sys_geteuid16
|
||||
50 i386 getegid sys_getegid16
|
||||
51 i386 acct sys_acct
|
||||
52 i386 umount2 sys_umount
|
||||
53 i386 lock
|
||||
54 i386 ioctl sys_ioctl compat_sys_ioctl
|
||||
55 i386 fcntl sys_fcntl compat_sys_fcntl64
|
||||
56 i386 mpx
|
||||
57 i386 setpgid sys_setpgid
|
||||
58 i386 ulimit
|
||||
59 i386 oldolduname sys_olduname
|
||||
60 i386 umask sys_umask
|
||||
61 i386 chroot sys_chroot
|
||||
62 i386 ustat sys_ustat compat_sys_ustat
|
||||
63 i386 dup2 sys_dup2
|
||||
64 i386 getppid sys_getppid
|
||||
65 i386 getpgrp sys_getpgrp
|
||||
66 i386 setsid sys_setsid
|
||||
67 i386 sigaction sys_sigaction compat_sys_sigaction
|
||||
68 i386 sgetmask sys_sgetmask
|
||||
69 i386 ssetmask sys_ssetmask
|
||||
70 i386 setreuid sys_setreuid16
|
||||
71 i386 setregid sys_setregid16
|
||||
72 i386 sigsuspend sys_sigsuspend sys_sigsuspend
|
||||
73 i386 sigpending sys_sigpending compat_sys_sigpending
|
||||
74 i386 sethostname sys_sethostname
|
||||
75 i386 setrlimit sys_setrlimit compat_sys_setrlimit
|
||||
76 i386 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
|
||||
77 i386 getrusage sys_getrusage compat_sys_getrusage
|
||||
78 i386 gettimeofday sys_gettimeofday compat_sys_gettimeofday
|
||||
79 i386 settimeofday sys_settimeofday compat_sys_settimeofday
|
||||
80 i386 getgroups sys_getgroups16
|
||||
81 i386 setgroups sys_setgroups16
|
||||
82 i386 select sys_old_select compat_sys_old_select
|
||||
83 i386 symlink sys_symlink
|
||||
84 i386 oldlstat sys_lstat
|
||||
85 i386 readlink sys_readlink
|
||||
86 i386 uselib sys_uselib
|
||||
87 i386 swapon sys_swapon
|
||||
88 i386 reboot sys_reboot
|
||||
89 i386 readdir sys_old_readdir compat_sys_old_readdir
|
||||
90 i386 mmap sys_old_mmap sys32_mmap
|
||||
91 i386 munmap sys_munmap
|
||||
92 i386 truncate sys_truncate compat_sys_truncate
|
||||
93 i386 ftruncate sys_ftruncate compat_sys_ftruncate
|
||||
94 i386 fchmod sys_fchmod
|
||||
95 i386 fchown sys_fchown16
|
||||
96 i386 getpriority sys_getpriority
|
||||
97 i386 setpriority sys_setpriority
|
||||
98 i386 profil
|
||||
99 i386 statfs sys_statfs compat_sys_statfs
|
||||
100 i386 fstatfs sys_fstatfs compat_sys_fstatfs
|
||||
101 i386 ioperm sys_ioperm
|
||||
102 i386 socketcall sys_socketcall compat_sys_socketcall
|
||||
103 i386 syslog sys_syslog
|
||||
104 i386 setitimer sys_setitimer compat_sys_setitimer
|
||||
105 i386 getitimer sys_getitimer compat_sys_getitimer
|
||||
106 i386 stat sys_newstat compat_sys_newstat
|
||||
107 i386 lstat sys_newlstat compat_sys_newlstat
|
||||
108 i386 fstat sys_newfstat compat_sys_newfstat
|
||||
109 i386 olduname sys_uname
|
||||
110 i386 iopl sys_iopl
|
||||
111 i386 vhangup sys_vhangup
|
||||
112 i386 idle
|
||||
113 i386 vm86old sys_vm86old sys_ni_syscall
|
||||
114 i386 wait4 sys_wait4 compat_sys_wait4
|
||||
115 i386 swapoff sys_swapoff
|
||||
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
|
||||
117 i386 ipc sys_ipc compat_sys_ipc
|
||||
118 i386 fsync sys_fsync
|
||||
119 i386 sigreturn sys_sigreturn stub32_sigreturn
|
||||
120 i386 clone sys_clone stub32_clone
|
||||
121 i386 setdomainname sys_setdomainname
|
||||
122 i386 uname sys_newuname
|
||||
123 i386 modify_ldt sys_modify_ldt
|
||||
124 i386 adjtimex sys_adjtimex compat_sys_adjtimex
|
||||
125 i386 mprotect sys_mprotect
|
||||
126 i386 sigprocmask sys_sigprocmask compat_sys_sigprocmask
|
||||
127 i386 create_module
|
||||
128 i386 init_module sys_init_module
|
||||
129 i386 delete_module sys_delete_module
|
||||
130 i386 get_kernel_syms
|
||||
131 i386 quotactl sys_quotactl sys32_quotactl
|
||||
132 i386 getpgid sys_getpgid
|
||||
133 i386 fchdir sys_fchdir
|
||||
134 i386 bdflush sys_bdflush
|
||||
135 i386 sysfs sys_sysfs
|
||||
136 i386 personality sys_personality
|
||||
137 i386 afs_syscall
|
||||
138 i386 setfsuid sys_setfsuid16
|
||||
139 i386 setfsgid sys_setfsgid16
|
||||
140 i386 _llseek sys_llseek
|
||||
141 i386 getdents sys_getdents compat_sys_getdents
|
||||
142 i386 _newselect sys_select compat_sys_select
|
||||
143 i386 flock sys_flock
|
||||
144 i386 msync sys_msync
|
||||
145 i386 readv sys_readv compat_sys_readv
|
||||
146 i386 writev sys_writev compat_sys_writev
|
||||
147 i386 getsid sys_getsid
|
||||
148 i386 fdatasync sys_fdatasync
|
||||
149 i386 _sysctl sys_sysctl compat_sys_sysctl
|
||||
150 i386 mlock sys_mlock
|
||||
151 i386 munlock sys_munlock
|
||||
152 i386 mlockall sys_mlockall
|
||||
153 i386 munlockall sys_munlockall
|
||||
154 i386 sched_setparam sys_sched_setparam
|
||||
155 i386 sched_getparam sys_sched_getparam
|
||||
156 i386 sched_setscheduler sys_sched_setscheduler
|
||||
157 i386 sched_getscheduler sys_sched_getscheduler
|
||||
158 i386 sched_yield sys_sched_yield
|
||||
159 i386 sched_get_priority_max sys_sched_get_priority_max
|
||||
160 i386 sched_get_priority_min sys_sched_get_priority_min
|
||||
161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
|
||||
162 i386 nanosleep sys_nanosleep compat_sys_nanosleep
|
||||
163 i386 mremap sys_mremap
|
||||
164 i386 setresuid sys_setresuid16
|
||||
165 i386 getresuid sys_getresuid16
|
||||
166 i386 vm86 sys_vm86 sys_ni_syscall
|
||||
167 i386 query_module
|
||||
168 i386 poll sys_poll
|
||||
169 i386 nfsservctl
|
||||
170 i386 setresgid sys_setresgid16
|
||||
171 i386 getresgid sys_getresgid16
|
||||
172 i386 prctl sys_prctl
|
||||
173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn
|
||||
174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
|
||||
175 i386 rt_sigprocmask sys_rt_sigprocmask
|
||||
176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
|
||||
177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
||||
179 i386 rt_sigsuspend sys_rt_sigsuspend
|
||||
180 i386 pread64 sys_pread64 sys32_pread
|
||||
181 i386 pwrite64 sys_pwrite64 sys32_pwrite
|
||||
182 i386 chown sys_chown16
|
||||
183 i386 getcwd sys_getcwd
|
||||
184 i386 capget sys_capget
|
||||
185 i386 capset sys_capset
|
||||
186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
|
||||
187 i386 sendfile sys_sendfile compat_sys_sendfile
|
||||
188 i386 getpmsg
|
||||
189 i386 putpmsg
|
||||
190 i386 vfork sys_vfork stub32_vfork
|
||||
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
|
||||
192 i386 mmap2 sys_mmap_pgoff
|
||||
193 i386 truncate64 sys_truncate64 sys32_truncate64
|
||||
194 i386 ftruncate64 sys_ftruncate64 sys32_ftruncate64
|
||||
195 i386 stat64 sys_stat64 sys32_stat64
|
||||
196 i386 lstat64 sys_lstat64 sys32_lstat64
|
||||
197 i386 fstat64 sys_fstat64 sys32_fstat64
|
||||
198 i386 lchown32 sys_lchown
|
||||
199 i386 getuid32 sys_getuid
|
||||
200 i386 getgid32 sys_getgid
|
||||
201 i386 geteuid32 sys_geteuid
|
||||
202 i386 getegid32 sys_getegid
|
||||
203 i386 setreuid32 sys_setreuid
|
||||
204 i386 setregid32 sys_setregid
|
||||
205 i386 getgroups32 sys_getgroups
|
||||
206 i386 setgroups32 sys_setgroups
|
||||
207 i386 fchown32 sys_fchown
|
||||
208 i386 setresuid32 sys_setresuid
|
||||
209 i386 getresuid32 sys_getresuid
|
||||
210 i386 setresgid32 sys_setresgid
|
||||
211 i386 getresgid32 sys_getresgid
|
||||
212 i386 chown32 sys_chown
|
||||
213 i386 setuid32 sys_setuid
|
||||
214 i386 setgid32 sys_setgid
|
||||
215 i386 setfsuid32 sys_setfsuid
|
||||
216 i386 setfsgid32 sys_setfsgid
|
||||
217 i386 pivot_root sys_pivot_root
|
||||
218 i386 mincore sys_mincore
|
||||
219 i386 madvise sys_madvise
|
||||
220 i386 getdents64 sys_getdents64 compat_sys_getdents64
|
||||
221 i386 fcntl64 sys_fcntl64 compat_sys_fcntl64
|
||||
# 222 is unused
|
||||
# 223 is unused
|
||||
224 i386 gettid sys_gettid
|
||||
225 i386 readahead sys_readahead sys32_readahead
|
||||
226 i386 setxattr sys_setxattr
|
||||
227 i386 lsetxattr sys_lsetxattr
|
||||
228 i386 fsetxattr sys_fsetxattr
|
||||
229 i386 getxattr sys_getxattr
|
||||
230 i386 lgetxattr sys_lgetxattr
|
||||
231 i386 fgetxattr sys_fgetxattr
|
||||
232 i386 listxattr sys_listxattr
|
||||
233 i386 llistxattr sys_llistxattr
|
||||
234 i386 flistxattr sys_flistxattr
|
||||
235 i386 removexattr sys_removexattr
|
||||
236 i386 lremovexattr sys_lremovexattr
|
||||
237 i386 fremovexattr sys_fremovexattr
|
||||
238 i386 tkill sys_tkill
|
||||
239 i386 sendfile64 sys_sendfile64
|
||||
240 i386 futex sys_futex compat_sys_futex
|
||||
241 i386 sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
|
||||
242 i386 sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
|
||||
243 i386 set_thread_area sys_set_thread_area
|
||||
244 i386 get_thread_area sys_get_thread_area
|
||||
245 i386 io_setup sys_io_setup compat_sys_io_setup
|
||||
246 i386 io_destroy sys_io_destroy
|
||||
247 i386 io_getevents sys_io_getevents compat_sys_io_getevents
|
||||
248 i386 io_submit sys_io_submit compat_sys_io_submit
|
||||
249 i386 io_cancel sys_io_cancel
|
||||
250 i386 fadvise64 sys_fadvise64 sys32_fadvise64
|
||||
# 251 is available for reuse (was briefly sys_set_zone_reclaim)
|
||||
252 i386 exit_group sys_exit_group
|
||||
253 i386 lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
|
||||
254 i386 epoll_create sys_epoll_create
|
||||
255 i386 epoll_ctl sys_epoll_ctl
|
||||
256 i386 epoll_wait sys_epoll_wait
|
||||
257 i386 remap_file_pages sys_remap_file_pages
|
||||
258 i386 set_tid_address sys_set_tid_address
|
||||
259 i386 timer_create sys_timer_create compat_sys_timer_create
|
||||
260 i386 timer_settime sys_timer_settime compat_sys_timer_settime
|
||||
261 i386 timer_gettime sys_timer_gettime compat_sys_timer_gettime
|
||||
262 i386 timer_getoverrun sys_timer_getoverrun
|
||||
263 i386 timer_delete sys_timer_delete
|
||||
264 i386 clock_settime sys_clock_settime compat_sys_clock_settime
|
||||
265 i386 clock_gettime sys_clock_gettime compat_sys_clock_gettime
|
||||
266 i386 clock_getres sys_clock_getres compat_sys_clock_getres
|
||||
267 i386 clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
|
||||
268 i386 statfs64 sys_statfs64 compat_sys_statfs64
|
||||
269 i386 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
|
||||
270 i386 tgkill sys_tgkill
|
||||
271 i386 utimes sys_utimes compat_sys_utimes
|
||||
272 i386 fadvise64_64 sys_fadvise64_64 sys32_fadvise64_64
|
||||
273 i386 vserver
|
||||
274 i386 mbind sys_mbind
|
||||
275 i386 get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
|
||||
276 i386 set_mempolicy sys_set_mempolicy
|
||||
277 i386 mq_open sys_mq_open compat_sys_mq_open
|
||||
278 i386 mq_unlink sys_mq_unlink
|
||||
279 i386 mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
|
||||
280 i386 mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
|
||||
281 i386 mq_notify sys_mq_notify compat_sys_mq_notify
|
||||
282 i386 mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
|
||||
283 i386 kexec_load sys_kexec_load compat_sys_kexec_load
|
||||
284 i386 waitid sys_waitid compat_sys_waitid
|
||||
# 285 sys_setaltroot
|
||||
286 i386 add_key sys_add_key
|
||||
287 i386 request_key sys_request_key
|
||||
288 i386 keyctl sys_keyctl
|
||||
289 i386 ioprio_set sys_ioprio_set
|
||||
290 i386 ioprio_get sys_ioprio_get
|
||||
291 i386 inotify_init sys_inotify_init
|
||||
292 i386 inotify_add_watch sys_inotify_add_watch
|
||||
293 i386 inotify_rm_watch sys_inotify_rm_watch
|
||||
294 i386 migrate_pages sys_migrate_pages
|
||||
295 i386 openat sys_openat compat_sys_openat
|
||||
296 i386 mkdirat sys_mkdirat
|
||||
297 i386 mknodat sys_mknodat
|
||||
298 i386 fchownat sys_fchownat
|
||||
299 i386 futimesat sys_futimesat compat_sys_futimesat
|
||||
300 i386 fstatat64 sys_fstatat64 sys32_fstatat
|
||||
301 i386 unlinkat sys_unlinkat
|
||||
302 i386 renameat sys_renameat
|
||||
303 i386 linkat sys_linkat
|
||||
304 i386 symlinkat sys_symlinkat
|
||||
305 i386 readlinkat sys_readlinkat
|
||||
306 i386 fchmodat sys_fchmodat
|
||||
307 i386 faccessat sys_faccessat
|
||||
308 i386 pselect6 sys_pselect6 compat_sys_pselect6
|
||||
309 i386 ppoll sys_ppoll compat_sys_ppoll
|
||||
310 i386 unshare sys_unshare
|
||||
311 i386 set_robust_list sys_set_robust_list compat_sys_set_robust_list
|
||||
312 i386 get_robust_list sys_get_robust_list compat_sys_get_robust_list
|
||||
313 i386 splice sys_splice
|
||||
314 i386 sync_file_range sys_sync_file_range sys32_sync_file_range
|
||||
315 i386 tee sys_tee
|
||||
316 i386 vmsplice sys_vmsplice compat_sys_vmsplice
|
||||
317 i386 move_pages sys_move_pages compat_sys_move_pages
|
||||
318 i386 getcpu sys_getcpu
|
||||
319 i386 epoll_pwait sys_epoll_pwait
|
||||
320 i386 utimensat sys_utimensat compat_sys_utimensat
|
||||
321 i386 signalfd sys_signalfd compat_sys_signalfd
|
||||
322 i386 timerfd_create sys_timerfd_create
|
||||
323 i386 eventfd sys_eventfd
|
||||
324 i386 fallocate sys_fallocate sys32_fallocate
|
||||
325 i386 timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
|
||||
326 i386 timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
|
||||
327 i386 signalfd4 sys_signalfd4 compat_sys_signalfd4
|
||||
328 i386 eventfd2 sys_eventfd2
|
||||
329 i386 epoll_create1 sys_epoll_create1
|
||||
330 i386 dup3 sys_dup3
|
||||
331 i386 pipe2 sys_pipe2
|
||||
332 i386 inotify_init1 sys_inotify_init1
|
||||
333 i386 preadv sys_preadv compat_sys_preadv
|
||||
334 i386 pwritev sys_pwritev compat_sys_pwritev
|
||||
335 i386 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||
336 i386 perf_event_open sys_perf_event_open
|
||||
337 i386 recvmmsg sys_recvmmsg compat_sys_recvmmsg
|
||||
338 i386 fanotify_init sys_fanotify_init
|
||||
339 i386 fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
|
||||
340 i386 prlimit64 sys_prlimit64
|
||||
341 i386 name_to_handle_at sys_name_to_handle_at
|
||||
342 i386 open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
|
||||
343 i386 clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
|
||||
344 i386 syncfs sys_syncfs
|
||||
345 i386 sendmmsg sys_sendmmsg compat_sys_sendmmsg
|
||||
346 i386 setns sys_setns
|
||||
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
|
||||
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
|
||||
349 i386 kcmp sys_kcmp
|
||||
350 i386 finit_module sys_finit_module
|
||||
351 i386 sched_setattr sys_sched_setattr
|
||||
352 i386 sched_getattr sys_sched_getattr
|
||||
353 i386 renameat2 sys_renameat2
|
||||
354 i386 seccomp sys_seccomp
|
||||
355 i386 getrandom sys_getrandom
|
||||
356 i386 memfd_create sys_memfd_create
|
||||
357 i386 bpf sys_bpf
|
||||
358 i386 execveat sys_execveat stub32_execveat
|
370
arch/x86/entry/syscalls/syscall_64.tbl
Normal file
370
arch/x86/entry/syscalls/syscall_64.tbl
Normal file
@@ -0,0 +1,370 @@
|
||||
#
|
||||
# 64-bit system call numbers and entry vectors
|
||||
#
|
||||
# The format is:
|
||||
# <number> <abi> <name> <entry point>
|
||||
#
|
||||
# The abi is "common", "64" or "x32" for this file.
|
||||
#
|
||||
0 common read sys_read
|
||||
1 common write sys_write
|
||||
2 common open sys_open
|
||||
3 common close sys_close
|
||||
4 common stat sys_newstat
|
||||
5 common fstat sys_newfstat
|
||||
6 common lstat sys_newlstat
|
||||
7 common poll sys_poll
|
||||
8 common lseek sys_lseek
|
||||
9 common mmap sys_mmap
|
||||
10 common mprotect sys_mprotect
|
||||
11 common munmap sys_munmap
|
||||
12 common brk sys_brk
|
||||
13 64 rt_sigaction sys_rt_sigaction
|
||||
14 common rt_sigprocmask sys_rt_sigprocmask
|
||||
15 64 rt_sigreturn stub_rt_sigreturn
|
||||
16 64 ioctl sys_ioctl
|
||||
17 common pread64 sys_pread64
|
||||
18 common pwrite64 sys_pwrite64
|
||||
19 64 readv sys_readv
|
||||
20 64 writev sys_writev
|
||||
21 common access sys_access
|
||||
22 common pipe sys_pipe
|
||||
23 common select sys_select
|
||||
24 common sched_yield sys_sched_yield
|
||||
25 common mremap sys_mremap
|
||||
26 common msync sys_msync
|
||||
27 common mincore sys_mincore
|
||||
28 common madvise sys_madvise
|
||||
29 common shmget sys_shmget
|
||||
30 common shmat sys_shmat
|
||||
31 common shmctl sys_shmctl
|
||||
32 common dup sys_dup
|
||||
33 common dup2 sys_dup2
|
||||
34 common pause sys_pause
|
||||
35 common nanosleep sys_nanosleep
|
||||
36 common getitimer sys_getitimer
|
||||
37 common alarm sys_alarm
|
||||
38 common setitimer sys_setitimer
|
||||
39 common getpid sys_getpid
|
||||
40 common sendfile sys_sendfile64
|
||||
41 common socket sys_socket
|
||||
42 common connect sys_connect
|
||||
43 common accept sys_accept
|
||||
44 common sendto sys_sendto
|
||||
45 64 recvfrom sys_recvfrom
|
||||
46 64 sendmsg sys_sendmsg
|
||||
47 64 recvmsg sys_recvmsg
|
||||
48 common shutdown sys_shutdown
|
||||
49 common bind sys_bind
|
||||
50 common listen sys_listen
|
||||
51 common getsockname sys_getsockname
|
||||
52 common getpeername sys_getpeername
|
||||
53 common socketpair sys_socketpair
|
||||
54 64 setsockopt sys_setsockopt
|
||||
55 64 getsockopt sys_getsockopt
|
||||
56 common clone stub_clone
|
||||
57 common fork stub_fork
|
||||
58 common vfork stub_vfork
|
||||
59 64 execve stub_execve
|
||||
60 common exit sys_exit
|
||||
61 common wait4 sys_wait4
|
||||
62 common kill sys_kill
|
||||
63 common uname sys_newuname
|
||||
64 common semget sys_semget
|
||||
65 common semop sys_semop
|
||||
66 common semctl sys_semctl
|
||||
67 common shmdt sys_shmdt
|
||||
68 common msgget sys_msgget
|
||||
69 common msgsnd sys_msgsnd
|
||||
70 common msgrcv sys_msgrcv
|
||||
71 common msgctl sys_msgctl
|
||||
72 common fcntl sys_fcntl
|
||||
73 common flock sys_flock
|
||||
74 common fsync sys_fsync
|
||||
75 common fdatasync sys_fdatasync
|
||||
76 common truncate sys_truncate
|
||||
77 common ftruncate sys_ftruncate
|
||||
78 common getdents sys_getdents
|
||||
79 common getcwd sys_getcwd
|
||||
80 common chdir sys_chdir
|
||||
81 common fchdir sys_fchdir
|
||||
82 common rename sys_rename
|
||||
83 common mkdir sys_mkdir
|
||||
84 common rmdir sys_rmdir
|
||||
85 common creat sys_creat
|
||||
86 common link sys_link
|
||||
87 common unlink sys_unlink
|
||||
88 common symlink sys_symlink
|
||||
89 common readlink sys_readlink
|
||||
90 common chmod sys_chmod
|
||||
91 common fchmod sys_fchmod
|
||||
92 common chown sys_chown
|
||||
93 common fchown sys_fchown
|
||||
94 common lchown sys_lchown
|
||||
95 common umask sys_umask
|
||||
96 common gettimeofday sys_gettimeofday
|
||||
97 common getrlimit sys_getrlimit
|
||||
98 common getrusage sys_getrusage
|
||||
99 common sysinfo sys_sysinfo
|
||||
100 common times sys_times
|
||||
101 64 ptrace sys_ptrace
|
||||
102 common getuid sys_getuid
|
||||
103 common syslog sys_syslog
|
||||
104 common getgid sys_getgid
|
||||
105 common setuid sys_setuid
|
||||
106 common setgid sys_setgid
|
||||
107 common geteuid sys_geteuid
|
||||
108 common getegid sys_getegid
|
||||
109 common setpgid sys_setpgid
|
||||
110 common getppid sys_getppid
|
||||
111 common getpgrp sys_getpgrp
|
||||
112 common setsid sys_setsid
|
||||
113 common setreuid sys_setreuid
|
||||
114 common setregid sys_setregid
|
||||
115 common getgroups sys_getgroups
|
||||
116 common setgroups sys_setgroups
|
||||
117 common setresuid sys_setresuid
|
||||
118 common getresuid sys_getresuid
|
||||
119 common setresgid sys_setresgid
|
||||
120 common getresgid sys_getresgid
|
||||
121 common getpgid sys_getpgid
|
||||
122 common setfsuid sys_setfsuid
|
||||
123 common setfsgid sys_setfsgid
|
||||
124 common getsid sys_getsid
|
||||
125 common capget sys_capget
|
||||
126 common capset sys_capset
|
||||
127 64 rt_sigpending sys_rt_sigpending
|
||||
128 64 rt_sigtimedwait sys_rt_sigtimedwait
|
||||
129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
|
||||
130 common rt_sigsuspend sys_rt_sigsuspend
|
||||
131 64 sigaltstack sys_sigaltstack
|
||||
132 common utime sys_utime
|
||||
133 common mknod sys_mknod
|
||||
134 64 uselib
|
||||
135 common personality sys_personality
|
||||
136 common ustat sys_ustat
|
||||
137 common statfs sys_statfs
|
||||
138 common fstatfs sys_fstatfs
|
||||
139 common sysfs sys_sysfs
|
||||
140 common getpriority sys_getpriority
|
||||
141 common setpriority sys_setpriority
|
||||
142 common sched_setparam sys_sched_setparam
|
||||
143 common sched_getparam sys_sched_getparam
|
||||
144 common sched_setscheduler sys_sched_setscheduler
|
||||
145 common sched_getscheduler sys_sched_getscheduler
|
||||
146 common sched_get_priority_max sys_sched_get_priority_max
|
||||
147 common sched_get_priority_min sys_sched_get_priority_min
|
||||
148 common sched_rr_get_interval sys_sched_rr_get_interval
|
||||
149 common mlock sys_mlock
|
||||
150 common munlock sys_munlock
|
||||
151 common mlockall sys_mlockall
|
||||
152 common munlockall sys_munlockall
|
||||
153 common vhangup sys_vhangup
|
||||
154 common modify_ldt sys_modify_ldt
|
||||
155 common pivot_root sys_pivot_root
|
||||
156 64 _sysctl sys_sysctl
|
||||
157 common prctl sys_prctl
|
||||
158 common arch_prctl sys_arch_prctl
|
||||
159 common adjtimex sys_adjtimex
|
||||
160 common setrlimit sys_setrlimit
|
||||
161 common chroot sys_chroot
|
||||
162 common sync sys_sync
|
||||
163 common acct sys_acct
|
||||
164 common settimeofday sys_settimeofday
|
||||
165 common mount sys_mount
|
||||
166 common umount2 sys_umount
|
||||
167 common swapon sys_swapon
|
||||
168 common swapoff sys_swapoff
|
||||
169 common reboot sys_reboot
|
||||
170 common sethostname sys_sethostname
|
||||
171 common setdomainname sys_setdomainname
|
||||
172 common iopl sys_iopl
|
||||
173 common ioperm sys_ioperm
|
||||
174 64 create_module
|
||||
175 common init_module sys_init_module
|
||||
176 common delete_module sys_delete_module
|
||||
177 64 get_kernel_syms
|
||||
178 64 query_module
|
||||
179 common quotactl sys_quotactl
|
||||
180 64 nfsservctl
|
||||
181 common getpmsg
|
||||
182 common putpmsg
|
||||
183 common afs_syscall
|
||||
184 common tuxcall
|
||||
185 common security
|
||||
186 common gettid sys_gettid
|
||||
187 common readahead sys_readahead
|
||||
188 common setxattr sys_setxattr
|
||||
189 common lsetxattr sys_lsetxattr
|
||||
190 common fsetxattr sys_fsetxattr
|
||||
191 common getxattr sys_getxattr
|
||||
192 common lgetxattr sys_lgetxattr
|
||||
193 common fgetxattr sys_fgetxattr
|
||||
194 common listxattr sys_listxattr
|
||||
195 common llistxattr sys_llistxattr
|
||||
196 common flistxattr sys_flistxattr
|
||||
197 common removexattr sys_removexattr
|
||||
198 common lremovexattr sys_lremovexattr
|
||||
199 common fremovexattr sys_fremovexattr
|
||||
200 common tkill sys_tkill
|
||||
201 common time sys_time
|
||||
202 common futex sys_futex
|
||||
203 common sched_setaffinity sys_sched_setaffinity
|
||||
204 common sched_getaffinity sys_sched_getaffinity
|
||||
205 64 set_thread_area
|
||||
206 64 io_setup sys_io_setup
|
||||
207 common io_destroy sys_io_destroy
|
||||
208 common io_getevents sys_io_getevents
|
||||
209 64 io_submit sys_io_submit
|
||||
210 common io_cancel sys_io_cancel
|
||||
211 64 get_thread_area
|
||||
212 common lookup_dcookie sys_lookup_dcookie
|
||||
213 common epoll_create sys_epoll_create
|
||||
214 64 epoll_ctl_old
|
||||
215 64 epoll_wait_old
|
||||
216 common remap_file_pages sys_remap_file_pages
|
||||
217 common getdents64 sys_getdents64
|
||||
218 common set_tid_address sys_set_tid_address
|
||||
219 common restart_syscall sys_restart_syscall
|
||||
220 common semtimedop sys_semtimedop
|
||||
221 common fadvise64 sys_fadvise64
|
||||
222 64 timer_create sys_timer_create
|
||||
223 common timer_settime sys_timer_settime
|
||||
224 common timer_gettime sys_timer_gettime
|
||||
225 common timer_getoverrun sys_timer_getoverrun
|
||||
226 common timer_delete sys_timer_delete
|
||||
227 common clock_settime sys_clock_settime
|
||||
228 common clock_gettime sys_clock_gettime
|
||||
229 common clock_getres sys_clock_getres
|
||||
230 common clock_nanosleep sys_clock_nanosleep
|
||||
231 common exit_group sys_exit_group
|
||||
232 common epoll_wait sys_epoll_wait
|
||||
233 common epoll_ctl sys_epoll_ctl
|
||||
234 common tgkill sys_tgkill
|
||||
235 common utimes sys_utimes
|
||||
236 64 vserver
|
||||
237 common mbind sys_mbind
|
||||
238 common set_mempolicy sys_set_mempolicy
|
||||
239 common get_mempolicy sys_get_mempolicy
|
||||
240 common mq_open sys_mq_open
|
||||
241 common mq_unlink sys_mq_unlink
|
||||
242 common mq_timedsend sys_mq_timedsend
|
||||
243 common mq_timedreceive sys_mq_timedreceive
|
||||
244 64 mq_notify sys_mq_notify
|
||||
245 common mq_getsetattr sys_mq_getsetattr
|
||||
246 64 kexec_load sys_kexec_load
|
||||
247 64 waitid sys_waitid
|
||||
248 common add_key sys_add_key
|
||||
249 common request_key sys_request_key
|
||||
250 common keyctl sys_keyctl
|
||||
251 common ioprio_set sys_ioprio_set
|
||||
252 common ioprio_get sys_ioprio_get
|
||||
253 common inotify_init sys_inotify_init
|
||||
254 common inotify_add_watch sys_inotify_add_watch
|
||||
255 common inotify_rm_watch sys_inotify_rm_watch
|
||||
256 common migrate_pages sys_migrate_pages
|
||||
257 common openat sys_openat
|
||||
258 common mkdirat sys_mkdirat
|
||||
259 common mknodat sys_mknodat
|
||||
260 common fchownat sys_fchownat
|
||||
261 common futimesat sys_futimesat
|
||||
262 common newfstatat sys_newfstatat
|
||||
263 common unlinkat sys_unlinkat
|
||||
264 common renameat sys_renameat
|
||||
265 common linkat sys_linkat
|
||||
266 common symlinkat sys_symlinkat
|
||||
267 common readlinkat sys_readlinkat
|
||||
268 common fchmodat sys_fchmodat
|
||||
269 common faccessat sys_faccessat
|
||||
270 common pselect6 sys_pselect6
|
||||
271 common ppoll sys_ppoll
|
||||
272 common unshare sys_unshare
|
||||
273 64 set_robust_list sys_set_robust_list
|
||||
274 64 get_robust_list sys_get_robust_list
|
||||
275 common splice sys_splice
|
||||
276 common tee sys_tee
|
||||
277 common sync_file_range sys_sync_file_range
|
||||
278 64 vmsplice sys_vmsplice
|
||||
279 64 move_pages sys_move_pages
|
||||
280 common utimensat sys_utimensat
|
||||
281 common epoll_pwait sys_epoll_pwait
|
||||
282 common signalfd sys_signalfd
|
||||
283 common timerfd_create sys_timerfd_create
|
||||
284 common eventfd sys_eventfd
|
||||
285 common fallocate sys_fallocate
|
||||
286 common timerfd_settime sys_timerfd_settime
|
||||
287 common timerfd_gettime sys_timerfd_gettime
|
||||
288 common accept4 sys_accept4
|
||||
289 common signalfd4 sys_signalfd4
|
||||
290 common eventfd2 sys_eventfd2
|
||||
291 common epoll_create1 sys_epoll_create1
|
||||
292 common dup3 sys_dup3
|
||||
293 common pipe2 sys_pipe2
|
||||
294 common inotify_init1 sys_inotify_init1
|
||||
295 64 preadv sys_preadv
|
||||
296 64 pwritev sys_pwritev
|
||||
297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
|
||||
298 common perf_event_open sys_perf_event_open
|
||||
299 64 recvmmsg sys_recvmmsg
|
||||
300 common fanotify_init sys_fanotify_init
|
||||
301 common fanotify_mark sys_fanotify_mark
|
||||
302 common prlimit64 sys_prlimit64
|
||||
303 common name_to_handle_at sys_name_to_handle_at
|
||||
304 common open_by_handle_at sys_open_by_handle_at
|
||||
305 common clock_adjtime sys_clock_adjtime
|
||||
306 common syncfs sys_syncfs
|
||||
307 64 sendmmsg sys_sendmmsg
|
||||
308 common setns sys_setns
|
||||
309 common getcpu sys_getcpu
|
||||
310 64 process_vm_readv sys_process_vm_readv
|
||||
311 64 process_vm_writev sys_process_vm_writev
|
||||
312 common kcmp sys_kcmp
|
||||
313 common finit_module sys_finit_module
|
||||
314 common sched_setattr sys_sched_setattr
|
||||
315 common sched_getattr sys_sched_getattr
|
||||
316 common renameat2 sys_renameat2
|
||||
317 common seccomp sys_seccomp
|
||||
318 common getrandom sys_getrandom
|
||||
319 common memfd_create sys_memfd_create
|
||||
320 common kexec_file_load sys_kexec_file_load
|
||||
321 common bpf sys_bpf
|
||||
322 64 execveat stub_execveat
|
||||
|
||||
#
|
||||
# x32-specific system call numbers start at 512 to avoid cache impact
|
||||
# for native 64-bit operation.
|
||||
#
|
||||
512 x32 rt_sigaction compat_sys_rt_sigaction
|
||||
513 x32 rt_sigreturn stub_x32_rt_sigreturn
|
||||
514 x32 ioctl compat_sys_ioctl
|
||||
515 x32 readv compat_sys_readv
|
||||
516 x32 writev compat_sys_writev
|
||||
517 x32 recvfrom compat_sys_recvfrom
|
||||
518 x32 sendmsg compat_sys_sendmsg
|
||||
519 x32 recvmsg compat_sys_recvmsg
|
||||
520 x32 execve stub_x32_execve
|
||||
521 x32 ptrace compat_sys_ptrace
|
||||
522 x32 rt_sigpending compat_sys_rt_sigpending
|
||||
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
|
||||
524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
|
||||
525 x32 sigaltstack compat_sys_sigaltstack
|
||||
526 x32 timer_create compat_sys_timer_create
|
||||
527 x32 mq_notify compat_sys_mq_notify
|
||||
528 x32 kexec_load compat_sys_kexec_load
|
||||
529 x32 waitid compat_sys_waitid
|
||||
530 x32 set_robust_list compat_sys_set_robust_list
|
||||
531 x32 get_robust_list compat_sys_get_robust_list
|
||||
532 x32 vmsplice compat_sys_vmsplice
|
||||
533 x32 move_pages compat_sys_move_pages
|
||||
534 x32 preadv compat_sys_preadv64
|
||||
535 x32 pwritev compat_sys_pwritev64
|
||||
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
|
||||
537 x32 recvmmsg compat_sys_recvmmsg
|
||||
538 x32 sendmmsg compat_sys_sendmmsg
|
||||
539 x32 process_vm_readv compat_sys_process_vm_readv
|
||||
540 x32 process_vm_writev compat_sys_process_vm_writev
|
||||
541 x32 setsockopt compat_sys_setsockopt
|
||||
542 x32 getsockopt compat_sys_getsockopt
|
||||
543 x32 io_setup compat_sys_io_setup
|
||||
544 x32 io_submit compat_sys_io_submit
|
||||
545 x32 execveat stub_x32_execveat
|
27
arch/x86/entry/syscalls/syscallhdr.sh
Normal file
27
arch/x86/entry/syscalls/syscallhdr.sh
Normal file
@@ -0,0 +1,27 @@
|
||||
#!/bin/sh
|
||||
|
||||
in="$1"
|
||||
out="$2"
|
||||
my_abis=`echo "($3)" | tr ',' '|'`
|
||||
prefix="$4"
|
||||
offset="$5"
|
||||
|
||||
fileguard=_ASM_X86_`basename "$out" | sed \
|
||||
-e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
|
||||
-e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
|
||||
grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
|
||||
echo "#ifndef ${fileguard}"
|
||||
echo "#define ${fileguard} 1"
|
||||
echo ""
|
||||
|
||||
while read nr abi name entry ; do
|
||||
if [ -z "$offset" ]; then
|
||||
echo "#define __NR_${prefix}${name} $nr"
|
||||
else
|
||||
echo "#define __NR_${prefix}${name} ($offset + $nr)"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "#endif /* ${fileguard} */"
|
||||
) > "$out"
|
15
arch/x86/entry/syscalls/syscalltbl.sh
Normal file
15
arch/x86/entry/syscalls/syscalltbl.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#!/bin/sh
|
||||
|
||||
in="$1"
|
||||
out="$2"
|
||||
|
||||
grep '^[0-9]' "$in" | sort -n | (
|
||||
while read nr abi name entry compat; do
|
||||
abi=`echo "$abi" | tr '[a-z]' '[A-Z]'`
|
||||
if [ -n "$compat" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $compat)"
|
||||
elif [ -n "$entry" ]; then
|
||||
echo "__SYSCALL_${abi}($nr, $entry, $entry)"
|
||||
fi
|
||||
done
|
||||
) > "$out"
|
42
arch/x86/entry/thunk_32.S
Normal file
42
arch/x86/entry/thunk_32.S
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash)
|
||||
* Copyright 2008 by Steven Rostedt, Red Hat, Inc
|
||||
* (inspired by Andi Kleen's thunk_64.S)
|
||||
* Subject to the GNU public license, v.2. No warranty of any kind.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/asm.h>
|
||||
|
||||
/* put return address in eax (arg1) */
|
||||
.macro THUNK name, func, put_ret_addr_in_eax=0
|
||||
.globl \name
|
||||
\name:
|
||||
pushl %eax
|
||||
pushl %ecx
|
||||
pushl %edx
|
||||
|
||||
.if \put_ret_addr_in_eax
|
||||
/* Place EIP in the arg1 */
|
||||
movl 3*4(%esp), %eax
|
||||
.endif
|
||||
|
||||
call \func
|
||||
popl %edx
|
||||
popl %ecx
|
||||
popl %eax
|
||||
ret
|
||||
_ASM_NOKPROBE(\name)
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
|
||||
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
THUNK ___preempt_schedule, preempt_schedule
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
THUNK ___preempt_schedule_context, preempt_schedule_context
|
||||
#endif
|
||||
#endif
|
||||
|
69
arch/x86/entry/thunk_64.S
Normal file
69
arch/x86/entry/thunk_64.S
Normal file
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Save registers before calling assembly functions. This avoids
|
||||
* disturbance of register allocation in some inline assembly constructs.
|
||||
* Copyright 2001,2002 by Andi Kleen, SuSE Labs.
|
||||
* Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc.
|
||||
* Subject to the GNU public license, v.2. No warranty of any kind.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include "calling.h"
|
||||
#include <asm/asm.h>
|
||||
|
||||
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
|
||||
.macro THUNK name, func, put_ret_addr_in_rdi=0
|
||||
.globl \name
|
||||
\name:
|
||||
|
||||
/* this one pushes 9 elems, the next one would be %rIP */
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
pushq %rdx
|
||||
pushq %rcx
|
||||
pushq %rax
|
||||
pushq %r8
|
||||
pushq %r9
|
||||
pushq %r10
|
||||
pushq %r11
|
||||
|
||||
.if \put_ret_addr_in_rdi
|
||||
/* 9*8(%rsp) is return addr on stack */
|
||||
movq 9*8(%rsp), %rdi
|
||||
.endif
|
||||
|
||||
call \func
|
||||
jmp restore
|
||||
_ASM_NOKPROBE(\name)
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_TRACE_IRQFLAGS
|
||||
THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
|
||||
THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
THUNK lockdep_sys_exit_thunk,lockdep_sys_exit
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PREEMPT
|
||||
THUNK ___preempt_schedule, preempt_schedule
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
THUNK ___preempt_schedule_context, preempt_schedule_context
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_TRACE_IRQFLAGS) \
|
||||
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|
||||
|| defined(CONFIG_PREEMPT)
|
||||
restore:
|
||||
popq %r11
|
||||
popq %r10
|
||||
popq %r9
|
||||
popq %r8
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
ret
|
||||
_ASM_NOKPROBE(restore)
|
||||
#endif
|
7
arch/x86/entry/vdso/.gitignore
vendored
Normal file
7
arch/x86/entry/vdso/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
vdso.lds
|
||||
vdsox32.lds
|
||||
vdso32-syscall-syms.lds
|
||||
vdso32-sysenter-syms.lds
|
||||
vdso32-int80-syms.lds
|
||||
vdso-image-*.c
|
||||
vdso2c
|
209
arch/x86/entry/vdso/Makefile
Normal file
209
arch/x86/entry/vdso/Makefile
Normal file
@@ -0,0 +1,209 @@
|
||||
#
|
||||
# Building vDSO images for x86.
|
||||
#
|
||||
|
||||
KBUILD_CFLAGS += $(DISABLE_LTO)
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
VDSO64-$(CONFIG_X86_64) := y
|
||||
VDSOX32-$(CONFIG_X86_X32_ABI) := y
|
||||
VDSO32-$(CONFIG_X86_32) := y
|
||||
VDSO32-$(CONFIG_COMPAT) := y
|
||||
|
||||
# files to link into the vdso
|
||||
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
|
||||
|
||||
# files to link into kernel
|
||||
obj-y += vma.o
|
||||
|
||||
# vDSO images to build
|
||||
vdso_img-$(VDSO64-y) += 64
|
||||
vdso_img-$(VDSOX32-y) += x32
|
||||
vdso_img-$(VDSO32-y) += 32-int80
|
||||
vdso_img-$(CONFIG_COMPAT) += 32-syscall
|
||||
vdso_img-$(VDSO32-y) += 32-sysenter
|
||||
|
||||
obj-$(VDSO32-y) += vdso32-setup.o
|
||||
|
||||
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
|
||||
|
||||
$(obj)/vdso.o: $(obj)/vdso.so
|
||||
|
||||
targets += vdso.lds $(vobjs-y)
|
||||
|
||||
# Build the vDSO image C files and link them in.
|
||||
vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
|
||||
vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
|
||||
vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
|
||||
obj-y += $(vdso_img_objs)
|
||||
targets += $(vdso_img_cfiles)
|
||||
targets += $(vdso_img_sodbg)
|
||||
.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
|
||||
$(vdso_img-y:%=$(obj)/vdso%.so)
|
||||
|
||||
export CPPFLAGS_vdso.lds += -P -C
|
||||
|
||||
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
|
||||
-Wl,--no-undefined \
|
||||
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
|
||||
$(DISABLE_LTO)
|
||||
|
||||
$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
|
||||
$(call if_changed,vdso)
|
||||
|
||||
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
|
||||
hostprogs-y += vdso2c
|
||||
|
||||
quiet_cmd_vdso2c = VDSO2C $@
|
||||
define cmd_vdso2c
|
||||
$(obj)/vdso2c $< $(<:%.dbg=%) $@
|
||||
endef
|
||||
|
||||
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
|
||||
$(call if_changed,vdso2c)
|
||||
|
||||
#
|
||||
# Don't omit frame pointers for ease of userspace debugging, but do
|
||||
# optimize sibling calls.
|
||||
#
|
||||
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
|
||||
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
|
||||
-fno-omit-frame-pointer -foptimize-sibling-calls \
|
||||
-DDISABLE_BRANCH_PROFILING
|
||||
|
||||
$(vobjs): KBUILD_CFLAGS += $(CFL)
|
||||
|
||||
#
|
||||
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
|
||||
#
|
||||
CFLAGS_REMOVE_vdso-note.o = -pg
|
||||
CFLAGS_REMOVE_vclock_gettime.o = -pg
|
||||
CFLAGS_REMOVE_vgetcpu.o = -pg
|
||||
CFLAGS_REMOVE_vvar.o = -pg
|
||||
|
||||
#
|
||||
# X32 processes use x32 vDSO to access 64bit kernel data.
|
||||
#
|
||||
# Build x32 vDSO image:
|
||||
# 1. Compile x32 vDSO as 64bit.
|
||||
# 2. Convert object files to x32.
|
||||
# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
|
||||
# so that it can reach 64bit address space with 64bit pointers.
|
||||
#
|
||||
|
||||
CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
|
||||
VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
|
||||
-Wl,-soname=linux-vdso.so.1 \
|
||||
-Wl,-z,max-page-size=4096 \
|
||||
-Wl,-z,common-page-size=4096
|
||||
|
||||
# 64-bit objects to re-brand as x32
|
||||
vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
|
||||
|
||||
# x32-rebranded versions
|
||||
vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
|
||||
|
||||
# same thing, but in the output directory
|
||||
vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
|
||||
|
||||
# Convert 64bit object file to x32 for x32 vDSO.
|
||||
quiet_cmd_x32 = X32 $@
|
||||
cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
|
||||
|
||||
$(obj)/%-x32.o: $(obj)/%.o FORCE
|
||||
$(call if_changed,x32)
|
||||
|
||||
targets += vdsox32.lds $(vobjx32s-y)
|
||||
|
||||
$(obj)/%.so: OBJCOPYFLAGS := -S
|
||||
$(obj)/%.so: $(obj)/%.so.dbg
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
# Build multiple 32-bit vDSO images to choose from at boot time.
|
||||
#
|
||||
vdso32.so-$(VDSO32-y) += int80
|
||||
vdso32.so-$(CONFIG_COMPAT) += syscall
|
||||
vdso32.so-$(VDSO32-y) += sysenter
|
||||
|
||||
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
|
||||
|
||||
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
|
||||
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
|
||||
|
||||
# This makes sure the $(obj) subdirectory exists even though vdso32/
|
||||
# is not a kbuild sub-make subdirectory.
|
||||
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
|
||||
|
||||
targets += vdso32/vdso32.lds
|
||||
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
|
||||
targets += vdso32/vclock_gettime.o
|
||||
|
||||
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
|
||||
|
||||
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
|
||||
$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
|
||||
|
||||
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
|
||||
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
|
||||
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
|
||||
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
|
||||
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
|
||||
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
|
||||
$(obj)/vdso32/vdso32.lds \
|
||||
$(obj)/vdso32/vclock_gettime.o \
|
||||
$(obj)/vdso32/note.o \
|
||||
$(obj)/vdso32/%.o
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
# The DSO images are built using a special linker script.
|
||||
#
|
||||
quiet_cmd_vdso = VDSO $@
|
||||
cmd_vdso = $(CC) -nostdlib -o $@ \
|
||||
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
|
||||
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
|
||||
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
|
||||
|
||||
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
|
||||
$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
|
||||
GCOV_PROFILE := n
|
||||
|
||||
#
|
||||
# Install the unstripped copies of vdso*.so. If our toolchain supports
|
||||
# build-id, install .build-id links as well.
|
||||
#
|
||||
quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
|
||||
define cmd_vdso_install
|
||||
cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
|
||||
if readelf -n $< |grep -q 'Build ID'; then \
|
||||
buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
|
||||
first=`echo $$buildid | cut -b-2`; \
|
||||
last=`echo $$buildid | cut -b3-`; \
|
||||
mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
|
||||
ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
|
||||
fi
|
||||
endef
|
||||
|
||||
vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
|
||||
|
||||
$(MODLIB)/vdso: FORCE
|
||||
@mkdir -p $(MODLIB)/vdso
|
||||
|
||||
$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
|
||||
$(call cmd,vdso_install)
|
||||
|
||||
PHONY += vdso_install $(vdso_img_insttargets)
|
||||
vdso_install: $(vdso_img_insttargets) FORCE
|
||||
|
||||
clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
|
10
arch/x86/entry/vdso/checkundef.sh
Executable file
10
arch/x86/entry/vdso/checkundef.sh
Executable file
@@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
nm="$1"
|
||||
file="$2"
|
||||
$nm "$file" | grep '^ *U' > /dev/null 2>&1
|
||||
if [ $? -eq 1 ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "$file: undefined symbols found" >&2
|
||||
exit 1
|
||||
fi
|
351
arch/x86/entry/vdso/vclock_gettime.c
Normal file
351
arch/x86/entry/vdso/vclock_gettime.c
Normal file
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of clock_gettime, gettimeofday, and time.
|
||||
*
|
||||
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
|
||||
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
|
||||
*
|
||||
* The code should have no internal unresolved relocations.
|
||||
* Check with readelf after changing.
|
||||
*/
|
||||
|
||||
#include <uapi/linux/time.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/msr.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/time.h>
|
||||
|
||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||
|
||||
extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
|
||||
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
|
||||
extern time_t __vdso_time(time_t *t);
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
extern u8 hpet_page
|
||||
__attribute__((visibility("hidden")));
|
||||
|
||||
static notrace cycle_t vread_hpet(void)
|
||||
{
|
||||
return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_VDSO32
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/pvclock.h>
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti_base;
|
||||
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
|
||||
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
|
||||
|
||||
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
|
||||
|
||||
pvti_base = (struct pvclock_vsyscall_time_info *)
|
||||
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
|
||||
|
||||
return &pvti_base[offset];
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
unsigned cpu, cpu1;
|
||||
|
||||
|
||||
/*
|
||||
* Note: hypervisor must guarantee that:
|
||||
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
|
||||
* 2. that per-CPU pvclock time info is updated if the
|
||||
* underlying CPU changes.
|
||||
* 3. that version is increased whenever underlying CPU
|
||||
* changes.
|
||||
*
|
||||
*/
|
||||
do {
|
||||
cpu = __getcpu() & VGETCPU_CPU_MASK;
|
||||
/* TODO: We can put vcpu id into higher bits of pvti.version.
|
||||
* This will save a couple of cycles by getting rid of
|
||||
* __getcpu() calls (Gleb).
|
||||
*/
|
||||
|
||||
pvti = get_pvti(cpu);
|
||||
|
||||
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
|
||||
|
||||
/*
|
||||
* Test we're still on the cpu as well as the version.
|
||||
* We could have been migrated just after the first
|
||||
* vgetcpu but before fetching the version, so we
|
||||
* wouldn't notice a version change.
|
||||
*/
|
||||
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
|
||||
} while (unlikely(cpu != cpu1 ||
|
||||
(pvti->pvti.version & 1) ||
|
||||
pvti->pvti.version != version));
|
||||
|
||||
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
|
||||
*mode = VCLOCK_NONE;
|
||||
|
||||
/* refer to tsc.c read_tsc() comment for rationale */
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
return last;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
notrace static cycle_t vread_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)__native_read_tsc();
|
||||
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* GCC likes to generate cmov here, but this branch is extremely
|
||||
* predictable (it's just a funciton of time and the likely is
|
||||
* very likely) and there's a data dependence, so force GCC
|
||||
* to generate a branch instead. I don't barrier() because
|
||||
* we don't actually need a barrier, and if this function
|
||||
* ever gets inlined it will generate worse code.
|
||||
*/
|
||||
asm volatile ("");
|
||||
return last;
|
||||
}
|
||||
|
||||
notrace static inline u64 vgetsns(int *mode)
|
||||
{
|
||||
u64 v;
|
||||
cycles_t cycles;
|
||||
|
||||
if (gtod->vclock_mode == VCLOCK_TSC)
|
||||
cycles = vread_tsc();
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
else if (gtod->vclock_mode == VCLOCK_HPET)
|
||||
cycles = vread_hpet();
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
|
||||
cycles = vread_pvclock(mode);
|
||||
#endif
|
||||
else
|
||||
return 0;
|
||||
v = (cycles - gtod->cycle_last) & gtod->mask;
|
||||
return v * gtod->mult;
|
||||
}
|
||||
|
||||
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
|
||||
notrace static int __always_inline do_realtime(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
mode = gtod->vclock_mode;
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ns = gtod->wall_time_snsec;
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->shift;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
notrace static int __always_inline do_monotonic(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
mode = gtod->vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->shift;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
notrace static void do_realtime_coarse(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
ts->tv_sec = gtod->wall_time_coarse_sec;
|
||||
ts->tv_nsec = gtod->wall_time_coarse_nsec;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
}
|
||||
|
||||
notrace static void do_monotonic_coarse(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
ts->tv_sec = gtod->monotonic_time_coarse_sec;
|
||||
ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
}
|
||||
|
||||
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
|
||||
{
|
||||
switch (clock) {
|
||||
case CLOCK_REALTIME:
|
||||
if (do_realtime(ts) == VCLOCK_NONE)
|
||||
goto fallback;
|
||||
break;
|
||||
case CLOCK_MONOTONIC:
|
||||
if (do_monotonic(ts) == VCLOCK_NONE)
|
||||
goto fallback;
|
||||
break;
|
||||
case CLOCK_REALTIME_COARSE:
|
||||
do_realtime_coarse(ts);
|
||||
break;
|
||||
case CLOCK_MONOTONIC_COARSE:
|
||||
do_monotonic_coarse(ts);
|
||||
break;
|
||||
default:
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fallback:
|
||||
return vdso_fallback_gettime(clock, ts);
|
||||
}
|
||||
int clock_gettime(clockid_t, struct timespec *)
|
||||
__attribute__((weak, alias("__vdso_clock_gettime")));
|
||||
|
||||
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
if (likely(tv != NULL)) {
|
||||
if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
|
||||
return vdso_fallback_gtod(tv, tz);
|
||||
tv->tv_usec /= 1000;
|
||||
}
|
||||
if (unlikely(tz != NULL)) {
|
||||
tz->tz_minuteswest = gtod->tz_minuteswest;
|
||||
tz->tz_dsttime = gtod->tz_dsttime;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
int gettimeofday(struct timeval *, struct timezone *)
|
||||
__attribute__((weak, alias("__vdso_gettimeofday")));
|
||||
|
||||
/*
|
||||
* This will break when the xtime seconds get inaccurate, but that is
|
||||
* unlikely
|
||||
*/
|
||||
notrace time_t __vdso_time(time_t *t)
|
||||
{
|
||||
/* This is atomic on x86 so we don't need any locks. */
|
||||
time_t result = ACCESS_ONCE(gtod->wall_time_sec);
|
||||
|
||||
if (t)
|
||||
*t = result;
|
||||
return result;
|
||||
}
|
||||
int time(time_t *t)
|
||||
__attribute__((weak, alias("__vdso_time")));
|
118
arch/x86/entry/vdso/vdso-layout.lds.S
Normal file
118
arch/x86/entry/vdso/vdso-layout.lds.S
Normal file
@@ -0,0 +1,118 @@
|
||||
#include <asm/vdso.h>
|
||||
|
||||
/*
|
||||
* Linker script for vDSO. This is an ELF shared object prelinked to
|
||||
* its virtual address, and with only one read-only segment.
|
||||
* This script controls its layout.
|
||||
*/
|
||||
|
||||
#if defined(BUILD_VDSO64)
|
||||
# define SHDR_SIZE 64
|
||||
#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
|
||||
# define SHDR_SIZE 40
|
||||
#else
|
||||
# error unknown VDSO target
|
||||
#endif
|
||||
|
||||
#define NUM_FAKE_SHDRS 13
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
/*
|
||||
* User/kernel shared data is before the vDSO. This may be a little
|
||||
* uglier than putting it after the vDSO, but it avoids issues with
|
||||
* non-allocatable things that dangle past the end of the PT_LOAD
|
||||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 2 * PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
|
||||
#define __VVAR_KERNEL_LDS
|
||||
#include <asm/vvar.h>
|
||||
#undef __VVAR_KERNEL_LDS
|
||||
#undef EMIT_VVAR
|
||||
|
||||
hpet_page = vvar_start + PAGE_SIZE;
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
.hash : { *(.hash) } :text
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
|
||||
.dynamic : { *(.dynamic) } :text :dynamic
|
||||
|
||||
.rodata : {
|
||||
*(.rodata*)
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
*(.got.plt) *(.got)
|
||||
*(.gnu.linkonce.d.*)
|
||||
*(.bss*)
|
||||
*(.dynbss*)
|
||||
*(.gnu.linkonce.b.*)
|
||||
|
||||
/*
|
||||
* Ideally this would live in a C file, but that won't
|
||||
* work cleanly for x32 until we start building the x32
|
||||
* C code using an x32 toolchain.
|
||||
*/
|
||||
VDSO_FAKE_SECTION_TABLE_START = .;
|
||||
. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
|
||||
VDSO_FAKE_SECTION_TABLE_END = .;
|
||||
} :text
|
||||
|
||||
.fake_shstrtab : { *(.fake_shstrtab) } :text
|
||||
|
||||
|
||||
.note : { *(.note.*) } :text :note
|
||||
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
|
||||
.eh_frame : { KEEP (*(.eh_frame)) } :text
|
||||
|
||||
|
||||
/*
|
||||
* Text is well-separated from actual data: there's plenty of
|
||||
* stuff that isn't used at runtime in between.
|
||||
*/
|
||||
|
||||
.text : { *(.text*) } :text =0x90909090,
|
||||
|
||||
/*
|
||||
* At the end so that eu-elflint stays happy when vdso2c strips
|
||||
* these. A better implementation would avoid allocating space
|
||||
* for these.
|
||||
*/
|
||||
.altinstructions : { *(.altinstructions) } :text
|
||||
.altinstr_replacement : { *(.altinstr_replacement) } :text
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.discard)
|
||||
*(.discard.*)
|
||||
*(__bug_table)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Very old versions of ld do not recognize this name token; use the constant.
|
||||
*/
|
||||
#define PT_GNU_EH_FRAME 0x6474e550
|
||||
|
||||
/*
|
||||
* We must supply the ELF program headers explicitly to get just one
|
||||
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
|
||||
*/
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
|
||||
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
|
||||
note PT_NOTE FLAGS(4); /* PF_R */
|
||||
eh_frame_hdr PT_GNU_EH_FRAME;
|
||||
}
|
12
arch/x86/entry/vdso/vdso-note.S
Normal file
12
arch/x86/entry/vdso/vdso-note.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
|
||||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/uts.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ELFNOTE_END
|
29
arch/x86/entry/vdso/vdso.lds.S
Normal file
29
arch/x86/entry/vdso/vdso.lds.S
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Linker script for 64-bit vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#define BUILD_VDSO64
|
||||
|
||||
#include "vdso-layout.lds.S"
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION {
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
clock_gettime;
|
||||
__vdso_clock_gettime;
|
||||
gettimeofday;
|
||||
__vdso_gettimeofday;
|
||||
getcpu;
|
||||
__vdso_getcpu;
|
||||
time;
|
||||
__vdso_time;
|
||||
local: *;
|
||||
};
|
||||
}
|
253
arch/x86/entry/vdso/vdso2c.c
Normal file
253
arch/x86/entry/vdso/vdso2c.c
Normal file
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
* vdso2c - A vdso image preparation tool
|
||||
* Copyright (c) 2014 Andy Lutomirski and others
|
||||
* Licensed under the GPL v2
|
||||
*
|
||||
* vdso2c requires stripped and unstripped input. It would be trivial
|
||||
* to fully strip the input in here, but, for reasons described below,
|
||||
* we need to write a section table. Doing this is more or less
|
||||
* equivalent to dropping all non-allocatable sections, but it's
|
||||
* easier to let objcopy handle that instead of doing it ourselves.
|
||||
* If we ever need to do something fancier than what objcopy provides,
|
||||
* it would be straightforward to add here.
|
||||
*
|
||||
* We're keep a section table for a few reasons:
|
||||
*
|
||||
* The Go runtime had a couple of bugs: it would read the section
|
||||
* table to try to figure out how many dynamic symbols there were (it
|
||||
* shouldn't have looked at the section table at all) and, if there
|
||||
* were no SHT_SYNDYM section table entry, it would use an
|
||||
* uninitialized value for the number of symbols. An empty DYNSYM
|
||||
* table would work, but I see no reason not to write a valid one (and
|
||||
* keep full performance for old Go programs). This hack is only
|
||||
* needed on x86_64.
|
||||
*
|
||||
* The bug was introduced on 2012-08-31 by:
|
||||
* https://code.google.com/p/go/source/detail?r=56ea40aac72b
|
||||
* and was fixed on 2014-06-13 by:
|
||||
* https://code.google.com/p/go/source/detail?r=fc1cd5e12595
|
||||
*
|
||||
* Binutils has issues debugging the vDSO: it reads the section table to
|
||||
* find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
|
||||
* would break build-id if we removed the section table. Binutils
|
||||
* also requires that shstrndx != 0. See:
|
||||
* https://sourceware.org/bugzilla/show_bug.cgi?id=17064
|
||||
*
|
||||
* elfutils might not look for PT_NOTE if there is a section table at
|
||||
* all. I don't know whether this matters for any practical purpose.
|
||||
*
|
||||
* For simplicity, rather than hacking up a partial section table, we
|
||||
* just write a mostly complete one. We omit non-dynamic symbols,
|
||||
* though, since they're rather large.
|
||||
*
|
||||
* Once binutils gets fixed, we might be able to drop this for all but
|
||||
* the 64-bit vdso, since build-id only works in kernel RPMs, and
|
||||
* systems that update to new enough kernel RPMs will likely update
|
||||
* binutils in sync. build-id has never worked for home-built kernel
|
||||
* RPMs without manual symlinking, and I suspect that no one ever does
|
||||
* that.
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <err.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <tools/le_byteshift.h>
|
||||
|
||||
#include <linux/elf.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
const char *outfilename;
|
||||
|
||||
/* Symbols that we need in vdso2c. */
|
||||
enum {
|
||||
sym_vvar_start,
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
|
||||
const int special_pages[] = {
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
const char *name;
|
||||
bool export;
|
||||
};
|
||||
|
||||
struct vdso_sym required_syms[] = {
|
||||
[sym_vvar_start] = {"vvar_start", true},
|
||||
[sym_vvar_page] = {"vvar_page", true},
|
||||
[sym_hpet_page] = {"hpet_page", true},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||
},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_END", false
|
||||
},
|
||||
{"VDSO32_NOTE_MASK", true},
|
||||
{"VDSO32_SYSENTER_RETURN", true},
|
||||
{"__kernel_vsyscall", true},
|
||||
{"__kernel_sigreturn", true},
|
||||
{"__kernel_rt_sigreturn", true},
|
||||
};
|
||||
|
||||
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
|
||||
static void fail(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
fprintf(stderr, "Error: ");
|
||||
vfprintf(stderr, format, ap);
|
||||
if (outfilename)
|
||||
unlink(outfilename);
|
||||
exit(1);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evil macros for little-endian reads and writes
|
||||
*/
|
||||
#define GLE(x, bits, ifnot) \
|
||||
__builtin_choose_expr( \
|
||||
(sizeof(*(x)) == bits/8), \
|
||||
(__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
|
||||
|
||||
extern void bad_get_le(void);
|
||||
#define LAST_GLE(x) \
|
||||
__builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
|
||||
|
||||
#define GET_LE(x) \
|
||||
GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
|
||||
|
||||
#define PLE(x, val, bits, ifnot) \
|
||||
__builtin_choose_expr( \
|
||||
(sizeof(*(x)) == bits/8), \
|
||||
put_unaligned_le##bits((val), (x)), ifnot)
|
||||
|
||||
extern void bad_put_le(void);
|
||||
#define LAST_PLE(x, val) \
|
||||
__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
|
||||
|
||||
#define PUT_LE(x, val) \
|
||||
PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
|
||||
|
||||
|
||||
#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
|
||||
|
||||
#define BITSFUNC3(name, bits, suffix) name##bits##suffix
|
||||
#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
|
||||
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
|
||||
|
||||
#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
|
||||
|
||||
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
|
||||
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
|
||||
#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
|
||||
|
||||
#define ELF_BITS 64
|
||||
#include "vdso2c.h"
|
||||
#undef ELF_BITS
|
||||
|
||||
#define ELF_BITS 32
|
||||
#include "vdso2c.h"
|
||||
#undef ELF_BITS
|
||||
|
||||
static void go(void *raw_addr, size_t raw_len,
|
||||
void *stripped_addr, size_t stripped_len,
|
||||
FILE *outfile, const char *name)
|
||||
{
|
||||
Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
|
||||
|
||||
if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
|
||||
go64(raw_addr, raw_len, stripped_addr, stripped_len,
|
||||
outfile, name);
|
||||
} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
|
||||
go32(raw_addr, raw_len, stripped_addr, stripped_len,
|
||||
outfile, name);
|
||||
} else {
|
||||
fail("unknown ELF class\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void map_input(const char *name, void **addr, size_t *len, int prot)
|
||||
{
|
||||
off_t tmp_len;
|
||||
|
||||
int fd = open(name, O_RDONLY);
|
||||
if (fd == -1)
|
||||
err(1, "%s", name);
|
||||
|
||||
tmp_len = lseek(fd, 0, SEEK_END);
|
||||
if (tmp_len == (off_t)-1)
|
||||
err(1, "lseek");
|
||||
*len = (size_t)tmp_len;
|
||||
|
||||
*addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
|
||||
if (*addr == MAP_FAILED)
|
||||
err(1, "mmap");
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
size_t raw_len, stripped_len;
|
||||
void *raw_addr, *stripped_addr;
|
||||
FILE *outfile;
|
||||
char *name, *tmp;
|
||||
int namelen;
|
||||
|
||||
if (argc != 4) {
|
||||
printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out the struct name. If we're writing to a .so file,
|
||||
* generate raw output insted.
|
||||
*/
|
||||
name = strdup(argv[3]);
|
||||
namelen = strlen(name);
|
||||
if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
|
||||
name = NULL;
|
||||
} else {
|
||||
tmp = strrchr(name, '/');
|
||||
if (tmp)
|
||||
name = tmp + 1;
|
||||
tmp = strchr(name, '.');
|
||||
if (tmp)
|
||||
*tmp = '\0';
|
||||
for (tmp = name; *tmp; tmp++)
|
||||
if (*tmp == '-')
|
||||
*tmp = '_';
|
||||
}
|
||||
|
||||
map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
|
||||
map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
|
||||
|
||||
outfilename = argv[3];
|
||||
outfile = fopen(outfilename, "w");
|
||||
if (!outfile)
|
||||
err(1, "%s", argv[2]);
|
||||
|
||||
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
|
||||
|
||||
munmap(raw_addr, raw_len);
|
||||
munmap(stripped_addr, stripped_len);
|
||||
fclose(outfile);
|
||||
|
||||
return 0;
|
||||
}
|
175
arch/x86/entry/vdso/vdso2c.h
Normal file
175
arch/x86/entry/vdso/vdso2c.h
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* This file is included twice from vdso2c.c. It generates code for 32-bit
|
||||
* and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs
|
||||
* are built for 32-bit userspace.
|
||||
*/
|
||||
|
||||
static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
void *stripped_addr, size_t stripped_len,
|
||||
FILE *outfile, const char *name)
|
||||
{
|
||||
int found_load = 0;
|
||||
unsigned long load_size = -1; /* Work around bogus warning */
|
||||
unsigned long mapping_size;
|
||||
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
|
||||
int i;
|
||||
unsigned long j;
|
||||
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
|
||||
*alt_sec = NULL;
|
||||
ELF(Dyn) *dyn = 0, *dyn_end = 0;
|
||||
const char *secstrings;
|
||||
INT_BITS syms[NSYMS] = {};
|
||||
|
||||
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
|
||||
|
||||
/* Walk the segment table. */
|
||||
for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
|
||||
if (GET_LE(&pt[i].p_type) == PT_LOAD) {
|
||||
if (found_load)
|
||||
fail("multiple PT_LOAD segs\n");
|
||||
|
||||
if (GET_LE(&pt[i].p_offset) != 0 ||
|
||||
GET_LE(&pt[i].p_vaddr) != 0)
|
||||
fail("PT_LOAD in wrong place\n");
|
||||
|
||||
if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz))
|
||||
fail("cannot handle memsz != filesz\n");
|
||||
|
||||
load_size = GET_LE(&pt[i].p_memsz);
|
||||
found_load = 1;
|
||||
} else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
|
||||
dyn = raw_addr + GET_LE(&pt[i].p_offset);
|
||||
dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
|
||||
GET_LE(&pt[i].p_memsz);
|
||||
}
|
||||
}
|
||||
if (!found_load)
|
||||
fail("no PT_LOAD seg\n");
|
||||
|
||||
if (stripped_len < load_size)
|
||||
fail("stripped input is too short\n");
|
||||
|
||||
/* Walk the dynamic table */
|
||||
for (i = 0; dyn + i < dyn_end &&
|
||||
GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
|
||||
typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
|
||||
if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
|
||||
tag == DT_RELENT || tag == DT_TEXTREL)
|
||||
fail("vdso image contains dynamic relocations\n");
|
||||
}
|
||||
|
||||
/* Walk the section table */
|
||||
secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
|
||||
secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
|
||||
for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
|
||||
ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize) * i;
|
||||
if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
|
||||
symtab_hdr = sh;
|
||||
|
||||
if (!strcmp(secstrings + GET_LE(&sh->sh_name),
|
||||
".altinstructions"))
|
||||
alt_sec = sh;
|
||||
}
|
||||
|
||||
if (!symtab_hdr)
|
||||
fail("no symbol table\n");
|
||||
|
||||
strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
|
||||
|
||||
/* Walk the symbol table */
|
||||
for (i = 0;
|
||||
i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
|
||||
i++) {
|
||||
int k;
|
||||
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
|
||||
GET_LE(&symtab_hdr->sh_entsize) * i;
|
||||
const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
|
||||
GET_LE(&sym->st_name);
|
||||
|
||||
for (k = 0; k < NSYMS; k++) {
|
||||
if (!strcmp(name, required_syms[k].name)) {
|
||||
if (syms[k]) {
|
||||
fail("duplicate symbol %s\n",
|
||||
required_syms[k].name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful: we use negative addresses, but
|
||||
* st_value is unsigned, so we rely
|
||||
* on syms[k] being a signed type of the
|
||||
* correct width.
|
||||
*/
|
||||
syms[k] = GET_LE(&sym->st_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate mapping addresses. */
|
||||
for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
|
||||
INT_BITS symval = syms[special_pages[i]];
|
||||
|
||||
if (!symval)
|
||||
continue; /* The mapping isn't used; ignore it. */
|
||||
|
||||
if (symval % 4096)
|
||||
fail("%s must be a multiple of 4096\n",
|
||||
required_syms[i].name);
|
||||
if (symval + 4096 < syms[sym_vvar_start])
|
||||
fail("%s underruns vvar_start\n",
|
||||
required_syms[i].name);
|
||||
if (symval + 4096 > 0)
|
||||
fail("%s is on the wrong side of the vdso text\n",
|
||||
required_syms[i].name);
|
||||
}
|
||||
if (syms[sym_vvar_start] % 4096)
|
||||
fail("vvar_begin must be a multiple of 4096\n");
|
||||
|
||||
if (!name) {
|
||||
fwrite(stripped_addr, stripped_len, 1, outfile);
|
||||
return;
|
||||
}
|
||||
|
||||
mapping_size = (stripped_len + 4095) / 4096 * 4096;
|
||||
|
||||
fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
|
||||
fprintf(outfile, "#include <linux/linkage.h>\n");
|
||||
fprintf(outfile, "#include <asm/page_types.h>\n");
|
||||
fprintf(outfile, "#include <asm/vdso.h>\n");
|
||||
fprintf(outfile, "\n");
|
||||
fprintf(outfile,
|
||||
"static unsigned char raw_data[%lu] __page_aligned_data = {",
|
||||
mapping_size);
|
||||
for (j = 0; j < stripped_len; j++) {
|
||||
if (j % 10 == 0)
|
||||
fprintf(outfile, "\n\t");
|
||||
fprintf(outfile, "0x%02X, ",
|
||||
(int)((unsigned char *)stripped_addr)[j]);
|
||||
}
|
||||
fprintf(outfile, "\n};\n\n");
|
||||
|
||||
fprintf(outfile, "static struct page *pages[%lu];\n\n",
|
||||
mapping_size / 4096);
|
||||
|
||||
fprintf(outfile, "const struct vdso_image %s = {\n", name);
|
||||
fprintf(outfile, "\t.data = raw_data,\n");
|
||||
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
|
||||
fprintf(outfile, "\t.text_mapping = {\n");
|
||||
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
|
||||
fprintf(outfile, "\t\t.pages = pages,\n");
|
||||
fprintf(outfile, "\t},\n");
|
||||
if (alt_sec) {
|
||||
fprintf(outfile, "\t.alt = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_offset));
|
||||
fprintf(outfile, "\t.alt_len = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_size));
|
||||
}
|
||||
for (i = 0; i < NSYMS; i++) {
|
||||
if (required_syms[i].export && syms[i])
|
||||
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
|
||||
required_syms[i].name, (int64_t)syms[i]);
|
||||
}
|
||||
fprintf(outfile, "};\n");
|
||||
}
|
120
arch/x86/entry/vdso/vdso32-setup.c
Normal file
120
arch/x86/entry/vdso/vdso32-setup.c
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* (C) Copyright 2002 Linus Torvalds
|
||||
* Portions based on the vdso-randomization code from exec-shield:
|
||||
* Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
|
||||
*
|
||||
* This file contains the needed initializations to support sysenter.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
#define VDSO_DEFAULT 0
|
||||
#else
|
||||
#define VDSO_DEFAULT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Should the kernel map a VDSO page into processes and pass its
|
||||
* address down to glibc upon exec()?
|
||||
*/
|
||||
unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
|
||||
|
||||
static int __init vdso32_setup(char *s)
|
||||
{
|
||||
vdso32_enabled = simple_strtoul(s, NULL, 0);
|
||||
|
||||
if (vdso32_enabled > 1)
|
||||
pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For consistency, the argument vdso32=[012] affects the 32-bit vDSO
|
||||
* behavior on both 64-bit and 32-bit kernels.
|
||||
* On 32-bit kernels, vdso=[012] means the same thing.
|
||||
*/
|
||||
__setup("vdso32=", vdso32_setup);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
|
||||
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
|
||||
#define vdso32_syscall() (0)
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
const struct vdso_image *selected_vdso32;
|
||||
#endif
|
||||
|
||||
int __init sysenter_setup(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (vdso32_syscall())
|
||||
selected_vdso32 = &vdso_image_32_syscall;
|
||||
else
|
||||
#endif
|
||||
if (vdso32_sysenter())
|
||||
selected_vdso32 = &vdso_image_32_sysenter;
|
||||
else
|
||||
selected_vdso32 = &vdso_image_32_int80;
|
||||
|
||||
init_vdso_image(selected_vdso32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
subsys_initcall(sysenter_setup);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* Register vsyscall32 into the ABI table */
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
static struct ctl_table abi_table2[] = {
|
||||
{
|
||||
.procname = "vsyscall32",
|
||||
.data = &vdso32_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table abi_root_table2[] = {
|
||||
{
|
||||
.procname = "abi",
|
||||
.mode = 0555,
|
||||
.child = abi_table2
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static __init int ia32_binfmt_init(void)
|
||||
{
|
||||
register_sysctl_table(abi_root_table2);
|
||||
return 0;
|
||||
}
|
||||
__initcall(ia32_binfmt_init);
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
1
arch/x86/entry/vdso/vdso32/.gitignore
vendored
Normal file
1
arch/x86/entry/vdso/vdso32/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
vdso32.lds
|
56
arch/x86/entry/vdso/vdso32/int80.S
Normal file
56
arch/x86/entry/vdso/vdso32/int80.S
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the old int $0x80 method.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#include "sigreturn.S"
|
||||
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
int $0x80
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI:
|
||||
.long .LENDCIEDLSI-.LSTARTCIEDLSI
|
||||
.LSTARTCIEDLSI:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIEDLSI:
|
||||
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
|
||||
.LSTARTFDEDLSI:
|
||||
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0
|
||||
.align 4
|
||||
.LENDFDEDLSI:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Pad out the segment to match the size of the sysenter.S version.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = 0x40
|
||||
.section .data,"aw",@progbits
|
||||
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
|
||||
.previous
|
44
arch/x86/entry/vdso/vdso32/note.S
Normal file
44
arch/x86/entry/vdso/vdso32/note.S
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
|
||||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
/* Ideally this would use UTS_NAME, but using a quoted string here
|
||||
doesn't work. Remember to change this when changing the
|
||||
kernel's name. */
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ELFNOTE_END
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Add a special note telling glibc's dynamic linker a fake hardware
|
||||
* flavor that it will use to choose the search path for libraries in the
|
||||
* same way it uses real hardware capabilities like "mmx".
|
||||
* We supply "nosegneg" as the fake capability, to indicate that we
|
||||
* do not like negative offsets in instructions using segment overrides,
|
||||
* since we implement those inefficiently. This makes it possible to
|
||||
* install libraries optimized to avoid those access patterns in someplace
|
||||
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
|
||||
* corresponding to the bits here is needed to make ldconfig work right.
|
||||
* It should contain:
|
||||
* hwcap 1 nosegneg
|
||||
* to match the mapping of bit to name that we give here.
|
||||
*
|
||||
* At runtime, the fake hardware feature will be considered to be present
|
||||
* if its bit is set in the mask word. So, we start with the mask 0, and
|
||||
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
|
||||
*/
|
||||
|
||||
#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
|
||||
|
||||
ELFNOTE_START(GNU, 2, "a")
|
||||
.long 1 /* ncaps */
|
||||
VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
|
||||
.long 0 /* mask */
|
||||
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
|
||||
ELFNOTE_END
|
||||
#endif
|
145
arch/x86/entry/vdso/vdso32/sigreturn.S
Normal file
145
arch/x86/entry/vdso/vdso32/sigreturn.S
Normal file
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Common code for the sigreturn entry points in vDSO images.
|
||||
* So far this code is the same for both int80 and sysenter versions.
|
||||
* This file is #include'd by int80.S et al to define them first thing.
|
||||
* The kernel assumes that the addresses of these routines are constant
|
||||
* for all vDSO implementations.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/unistd_32.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#ifndef SYSCALL_ENTER_KERNEL
|
||||
#define SYSCALL_ENTER_KERNEL int $0x80
|
||||
#endif
|
||||
|
||||
.text
|
||||
.globl __kernel_sigreturn
|
||||
.type __kernel_sigreturn,@function
|
||||
nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
|
||||
ALIGN
|
||||
__kernel_sigreturn:
|
||||
.LSTART_sigreturn:
|
||||
popl %eax /* XXX does this mean it needs unwind info? */
|
||||
movl $__NR_sigreturn, %eax
|
||||
SYSCALL_ENTER_KERNEL
|
||||
.LEND_sigreturn:
|
||||
nop
|
||||
.size __kernel_sigreturn,.-.LSTART_sigreturn
|
||||
|
||||
.globl __kernel_rt_sigreturn
|
||||
.type __kernel_rt_sigreturn,@function
|
||||
ALIGN
|
||||
__kernel_rt_sigreturn:
|
||||
.LSTART_rt_sigreturn:
|
||||
movl $__NR_rt_sigreturn, %eax
|
||||
SYSCALL_ENTER_KERNEL
|
||||
.LEND_rt_sigreturn:
|
||||
nop
|
||||
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI1:
|
||||
.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
|
||||
.LSTARTCIEDLSI1:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zRS" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0 /* DW_CFA_nop */
|
||||
.align 4
|
||||
.LENDCIEDLSI1:
|
||||
.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
|
||||
.LSTARTFDEDLSI1:
|
||||
.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
|
||||
/* HACK: The dwarf2 unwind routines will subtract 1 from the
|
||||
return address to get an address in the middle of the
|
||||
presumed call instruction. Since we didn't get here via
|
||||
a call, we need to include the nop before the real start
|
||||
to make up for it. */
|
||||
.long .LSTART_sigreturn-1-. /* PC-relative start address */
|
||||
.long .LEND_sigreturn-.LSTART_sigreturn+1
|
||||
.uleb128 0 /* Augmentation */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We record the locations of each register saved. This is
|
||||
complicated by the fact that the "CFA" is always assumed to
|
||||
be the value of the stack pointer in the caller. This means
|
||||
that we must define the CFA of this body of code to be the
|
||||
saved value of the stack pointer in the sigcontext. Which
|
||||
also means that there is no fixed relation to the other
|
||||
saved registers, which means that we must use DW_CFA_expression
|
||||
to compute their addresses. It also means that when we
|
||||
adjust the stack with the popl, we have to do it all over again. */
|
||||
|
||||
#define do_cfa_expr(offset) \
|
||||
.byte 0x0f; /* DW_CFA_def_cfa_expression */ \
|
||||
.uleb128 1f-0f; /* length */ \
|
||||
0: .byte 0x74; /* DW_OP_breg4 */ \
|
||||
.sleb128 offset; /* offset */ \
|
||||
.byte 0x06; /* DW_OP_deref */ \
|
||||
1:
|
||||
|
||||
#define do_expr(regno, offset) \
|
||||
.byte 0x10; /* DW_CFA_expression */ \
|
||||
.uleb128 regno; /* regno */ \
|
||||
.uleb128 1f-0f; /* length */ \
|
||||
0: .byte 0x74; /* DW_OP_breg4 */ \
|
||||
.sleb128 offset; /* offset */ \
|
||||
1:
|
||||
|
||||
do_cfa_expr(IA32_SIGCONTEXT_sp+4)
|
||||
do_expr(0, IA32_SIGCONTEXT_ax+4)
|
||||
do_expr(1, IA32_SIGCONTEXT_cx+4)
|
||||
do_expr(2, IA32_SIGCONTEXT_dx+4)
|
||||
do_expr(3, IA32_SIGCONTEXT_bx+4)
|
||||
do_expr(5, IA32_SIGCONTEXT_bp+4)
|
||||
do_expr(6, IA32_SIGCONTEXT_si+4)
|
||||
do_expr(7, IA32_SIGCONTEXT_di+4)
|
||||
do_expr(8, IA32_SIGCONTEXT_ip+4)
|
||||
|
||||
.byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
|
||||
|
||||
do_cfa_expr(IA32_SIGCONTEXT_sp)
|
||||
do_expr(0, IA32_SIGCONTEXT_ax)
|
||||
do_expr(1, IA32_SIGCONTEXT_cx)
|
||||
do_expr(2, IA32_SIGCONTEXT_dx)
|
||||
do_expr(3, IA32_SIGCONTEXT_bx)
|
||||
do_expr(5, IA32_SIGCONTEXT_bp)
|
||||
do_expr(6, IA32_SIGCONTEXT_si)
|
||||
do_expr(7, IA32_SIGCONTEXT_di)
|
||||
do_expr(8, IA32_SIGCONTEXT_ip)
|
||||
|
||||
.align 4
|
||||
.LENDFDEDLSI1:
|
||||
|
||||
.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
|
||||
.LSTARTFDEDLSI2:
|
||||
.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
|
||||
/* HACK: See above wrt unwind library assumptions. */
|
||||
.long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
|
||||
.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
|
||||
.uleb128 0 /* Augmentation */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We record the locations of each register saved. This is
|
||||
slightly less complicated than the above, since we don't
|
||||
modify the stack pointer in the process. */
|
||||
|
||||
do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
|
||||
do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
|
||||
do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
|
||||
do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
|
||||
do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
|
||||
do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
|
||||
do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
|
||||
do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
|
||||
do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
|
||||
|
||||
.align 4
|
||||
.LENDFDEDLSI2:
|
||||
.previous
|
75
arch/x86/entry/vdso/vdso32/syscall.S
Normal file
75
arch/x86/entry/vdso/vdso32/syscall.S
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the syscall instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#define SYSCALL_ENTER_KERNEL syscall
|
||||
#include "sigreturn.S"
|
||||
|
||||
#include <asm/segment.h>
|
||||
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ebp
|
||||
.Lpush_ebp:
|
||||
movl %ecx, %ebp
|
||||
syscall
|
||||
movl %ebp, %ecx
|
||||
popl %ebp
|
||||
.Lpop_ebp:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAME:
|
||||
.long .LENDCIE-.LSTARTCIE
|
||||
.LSTARTCIE:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIE:
|
||||
|
||||
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
|
||||
.LSTARTFDE1:
|
||||
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0 /* Augmentation length */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 8
|
||||
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
|
||||
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 4
|
||||
.align 4
|
||||
.LENDFDE1:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Pad out the segment to match the size of the sysenter.S version.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = 0x40
|
||||
.section .data,"aw",@progbits
|
||||
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
|
||||
.previous
|
116
arch/x86/entry/vdso/vdso32/sysenter.S
Normal file
116
arch/x86/entry/vdso/vdso32/sysenter.S
Normal file
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the sysenter instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#include "sigreturn.S"
|
||||
|
||||
/*
|
||||
* The caller puts arg2 in %ecx, which gets pushed. The kernel will use
|
||||
* %ecx itself for arg2. The pushing is because the sysexit instruction
|
||||
* (found in entry.S) requires that we clobber %ecx with the desired %esp.
|
||||
* User code might expect that %ecx is unclobbered though, as it would be
|
||||
* for returning via the iret instruction, so we must push and pop.
|
||||
*
|
||||
* The caller puts arg3 in %edx, which the sysexit instruction requires
|
||||
* for %eip. Thus, exactly as for arg2, we must push and pop.
|
||||
*
|
||||
* Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
|
||||
* instruction clobbers %esp, the user's %esp won't even survive entry
|
||||
* into the kernel. We store %esp in %ebp. Code in entry.S must fetch
|
||||
* arg6 from the stack.
|
||||
*
|
||||
* You can not use this vsyscall for the clone() syscall because the
|
||||
* three words on the parent stack do not get copied to the child.
|
||||
*/
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ecx
|
||||
.Lpush_ecx:
|
||||
push %edx
|
||||
.Lpush_edx:
|
||||
push %ebp
|
||||
.Lenter_kernel:
|
||||
movl %esp,%ebp
|
||||
sysenter
|
||||
|
||||
/* 7: align return point with nop's to make disassembly easier */
|
||||
.space 7,0x90
|
||||
|
||||
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
|
||||
int $0x80
|
||||
/* 16: System call normal return point is here! */
|
||||
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
|
||||
pop %ebp
|
||||
.Lpop_ebp:
|
||||
pop %edx
|
||||
.Lpop_edx:
|
||||
pop %ecx
|
||||
.Lpop_ecx:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI:
|
||||
.long .LENDCIEDLSI-.LSTARTCIEDLSI
|
||||
.LSTARTCIEDLSI:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIEDLSI:
|
||||
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
|
||||
.LSTARTFDEDLSI:
|
||||
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x10 /* RA at offset 16 now */
|
||||
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
|
||||
/* Finally the epilogue. */
|
||||
.byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x04 /* RA at offset 4 now */
|
||||
.align 4
|
||||
.LENDFDEDLSI:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Emit a symbol with the size of this .eh_frame data,
|
||||
* to verify it matches the other versions.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
|
30
arch/x86/entry/vdso/vdso32/vclock_gettime.c
Normal file
30
arch/x86/entry/vdso/vdso32/vclock_gettime.c
Normal file
@@ -0,0 +1,30 @@
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
#undef CONFIG_OPTIMIZE_INLINING
|
||||
#endif
|
||||
|
||||
#undef CONFIG_X86_PPRO_FENCE
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
* in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
|
||||
* configuration
|
||||
*/
|
||||
#undef CONFIG_64BIT
|
||||
#undef CONFIG_X86_64
|
||||
#undef CONFIG_ILLEGAL_POINTER_VALUE
|
||||
#undef CONFIG_SPARSEMEM_VMEMMAP
|
||||
#undef CONFIG_NR_CPUS
|
||||
|
||||
#define CONFIG_X86_32 1
|
||||
#define CONFIG_PAGE_OFFSET 0
|
||||
#define CONFIG_ILLEGAL_POINTER_VALUE 0
|
||||
#define CONFIG_NR_CPUS 1
|
||||
|
||||
#define BUILD_VDSO32_64
|
||||
|
||||
#endif
|
||||
|
||||
#include "../vclock_gettime.c"
|
1
arch/x86/entry/vdso/vdso32/vdso-fakesections.c
Normal file
1
arch/x86/entry/vdso/vdso32/vdso-fakesections.c
Normal file
@@ -0,0 +1 @@
|
||||
#include "../vdso-fakesections.c"
|
37
arch/x86/entry/vdso/vdso32/vdso32.lds.S
Normal file
37
arch/x86/entry/vdso/vdso32/vdso32.lds.S
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Linker script for 32-bit vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#include "../vdso-layout.lds.S"
|
||||
|
||||
/* The ELF entry point can be used to set the AT_SYSINFO value. */
|
||||
ENTRY(__kernel_vsyscall);
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION
|
||||
{
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
__vdso_clock_gettime;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_time;
|
||||
};
|
||||
|
||||
LINUX_2.5 {
|
||||
global:
|
||||
__kernel_vsyscall;
|
||||
__kernel_sigreturn;
|
||||
__kernel_rt_sigreturn;
|
||||
local: *;
|
||||
};
|
||||
}
|
25
arch/x86/entry/vdso/vdsox32.lds.S
Normal file
25
arch/x86/entry/vdso/vdsox32.lds.S
Normal file
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Linker script for x32 vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#define BUILD_VDSOX32
|
||||
|
||||
#include "vdso-layout.lds.S"
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION {
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
__vdso_clock_gettime;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_getcpu;
|
||||
__vdso_time;
|
||||
local: *;
|
||||
};
|
||||
}
|
28
arch/x86/entry/vdso/vgetcpu.c
Normal file
28
arch/x86/entry/vdso/vgetcpu.c
Normal file
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of getcpu()
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/getcpu.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/vgtod.h>
|
||||
|
||||
notrace long
|
||||
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
|
||||
{
|
||||
unsigned int p;
|
||||
|
||||
p = __getcpu();
|
||||
|
||||
if (cpu)
|
||||
*cpu = p & VGETCPU_CPU_MASK;
|
||||
if (node)
|
||||
*node = p >> 12;
|
||||
return 0;
|
||||
}
|
||||
|
||||
long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
||||
__attribute__((weak, alias("__vdso_getcpu")));
|
300
arch/x86/entry/vdso/vma.c
Normal file
300
arch/x86/entry/vdso/vma.c
Normal file
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright 2007 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GPL, v.2
|
||||
*
|
||||
* This contains most of the x86 vDSO kernel-side code.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
unsigned int __read_mostly vdso64_enabled = 1;
|
||||
#endif
|
||||
|
||||
void __init init_vdso_image(const struct vdso_image *image)
|
||||
{
|
||||
int i;
|
||||
int npages = (image->size) / PAGE_SIZE;
|
||||
|
||||
BUG_ON(image->size % PAGE_SIZE != 0);
|
||||
for (i = 0; i < npages; i++)
|
||||
image->text_mapping.pages[i] =
|
||||
virt_to_page(image->data + i*PAGE_SIZE);
|
||||
|
||||
apply_alternatives((struct alt_instr *)(image->data + image->alt),
|
||||
(struct alt_instr *)(image->data + image->alt +
|
||||
image->alt_len));
|
||||
}
|
||||
|
||||
struct linux_binprm;
|
||||
|
||||
/*
|
||||
* Put the vdso above the (randomized) stack with another randomized
|
||||
* offset. This way there is no hole in the middle of address space.
|
||||
* To save memory make sure it is still in the same PTE as the stack
|
||||
* top. This doesn't give that many random bits.
|
||||
*
|
||||
* Note that this algorithm is imperfect: the distribution of the vdso
|
||||
* start address within a PMD is biased toward the end.
|
||||
*
|
||||
* Only used for the 64-bit and x32 vdsos.
|
||||
*/
|
||||
static unsigned long vdso_addr(unsigned long start, unsigned len)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return 0;
|
||||
#else
|
||||
unsigned long addr, end;
|
||||
unsigned offset;
|
||||
|
||||
/*
|
||||
* Round up the start address. It can start out unaligned as a result
|
||||
* of stack start randomization.
|
||||
*/
|
||||
start = PAGE_ALIGN(start);
|
||||
|
||||
/* Round the lowest possible end address up to a PMD boundary. */
|
||||
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
|
||||
if (end >= TASK_SIZE_MAX)
|
||||
end = TASK_SIZE_MAX;
|
||||
end -= len;
|
||||
|
||||
if (end > start) {
|
||||
offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
|
||||
addr = start + (offset << PAGE_SHIFT);
|
||||
} else {
|
||||
addr = start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forcibly align the final address in case we have a hardware
|
||||
* issue that requires alignment for performance reasons.
|
||||
*/
|
||||
addr = align_vdso_addr(addr);
|
||||
|
||||
return addr;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr, text_start;
|
||||
int ret = 0;
|
||||
static struct page *no_pages[] = {NULL};
|
||||
static struct vm_special_mapping vvar_mapping = {
|
||||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
};
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
image->size - image->sym_vvar_start);
|
||||
} else {
|
||||
addr = 0;
|
||||
}
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
addr = get_unmapped_area(NULL, addr,
|
||||
image->size - image->sym_vvar_start, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
text_start = addr - image->sym_vvar_start;
|
||||
current->mm->context.vdso = (void __user *)text_start;
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
*/
|
||||
vma = _install_special_mapping(mm,
|
||||
text_start,
|
||||
image->size,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
|
||||
&image->text_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
vma = _install_special_mapping(mm,
|
||||
addr,
|
||||
-image->sym_vvar_start,
|
||||
VM_READ|VM_MAYREAD,
|
||||
&vvar_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
if (image->sym_vvar_page)
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_vvar_page,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && image->sym_hpet_page) {
|
||||
ret = io_remap_pfn_range(vma,
|
||||
text_start + image->sym_hpet_page,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
static int load_vdso32(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
|
||||
return 0;
|
||||
|
||||
ret = map_vdso(selected_vdso32, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
|
||||
current_thread_info()->sysenter_return =
|
||||
current->mm->context.vdso +
|
||||
selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
{
|
||||
if (!vdso64_enabled)
|
||||
return 0;
|
||||
|
||||
return map_vdso(&vdso_image_64, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
||||
int uses_interp)
|
||||
{
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
if (test_thread_flag(TIF_X32)) {
|
||||
if (!vdso64_enabled)
|
||||
return 0;
|
||||
|
||||
return map_vdso(&vdso_image_x32, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
return load_vdso32();
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
{
|
||||
return load_vdso32();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static __init int vdso_setup(char *s)
|
||||
{
|
||||
vdso64_enabled = simple_strtoul(s, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
__setup("vdso=", vdso_setup);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void vgetcpu_cpu_init(void *arg)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct desc_struct d = { };
|
||||
unsigned long node = 0;
|
||||
#ifdef CONFIG_NUMA
|
||||
node = cpu_to_node(cpu);
|
||||
#endif
|
||||
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
||||
write_rdtscp_aux((node << 12) | cpu);
|
||||
|
||||
/*
|
||||
* Store cpu number in limit so that it can be loaded
|
||||
* quickly in user space in vgetcpu. (12 bits for the CPU
|
||||
* and 8 bits for the node)
|
||||
*/
|
||||
d.limit0 = cpu | ((node & 0xf) << 12);
|
||||
d.limit = node >> 4;
|
||||
d.type = 5; /* RO data, expand down, accessed */
|
||||
d.dpl = 3; /* Visible to user code */
|
||||
d.s = 1; /* Not a system segment */
|
||||
d.p = 1; /* Present */
|
||||
d.d = 1; /* 32-bit */
|
||||
|
||||
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
||||
}
|
||||
|
||||
static int
|
||||
vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
||||
{
|
||||
long cpu = (long)arg;
|
||||
|
||||
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
||||
smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int __init init_vdso(void)
|
||||
{
|
||||
init_vdso_image(&vdso_image_64);
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
init_vdso_image(&vdso_image_x32);
|
||||
#endif
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
on_each_cpu(vgetcpu_cpu_init, NULL, 1);
|
||||
/* notifier priority > KVM */
|
||||
__hotcpu_notifier(vgetcpu_cpu_notifier, 30);
|
||||
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(init_vdso);
|
||||
#endif /* CONFIG_X86_64 */
|
7
arch/x86/entry/vsyscall/Makefile
Normal file
7
arch/x86/entry/vsyscall/Makefile
Normal file
@@ -0,0 +1,7 @@
|
||||
#
|
||||
# Makefile for the x86 low level vsyscall code
|
||||
#
|
||||
obj-y := vsyscall_gtod.o
|
||||
|
||||
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
|
||||
|
335
arch/x86/entry/vsyscall/vsyscall_64.c
Normal file
335
arch/x86/entry/vsyscall/vsyscall_64.c
Normal file
@@ -0,0 +1,335 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2014 Andy Lutomirski <luto@amacapital.net>
|
||||
*
|
||||
* Based on the original implementation which is:
|
||||
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
|
||||
* Copyright 2003 Andi Kleen, SuSE Labs.
|
||||
*
|
||||
* Parts of the original code have been moved to arch/x86/vdso/vma.c
|
||||
*
|
||||
* This file implements vsyscall emulation. vsyscalls are a legacy ABI:
|
||||
* Userspace can request certain kernel services by calling fixed
|
||||
* addresses. This concept is problematic:
|
||||
*
|
||||
* - It interferes with ASLR.
|
||||
* - It's awkward to write code that lives in kernel addresses but is
|
||||
* callable by userspace at fixed addresses.
|
||||
* - The whole concept is impossible for 32-bit compat userspace.
|
||||
* - UML cannot easily virtualize a vsyscall.
|
||||
*
|
||||
* As of mid-2014, I believe that there is no new userspace code that
|
||||
* will use a vsyscall if the vDSO is present. I hope that there will
|
||||
* soon be no new userspace code that will ever use a vsyscall.
|
||||
*
|
||||
* The code in this file emulates vsyscalls when notified of a page
|
||||
* fault to a vsyscall address.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/traps.h>
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "vsyscall_trace.h"
|
||||
|
||||
static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
|
||||
|
||||
static int __init vsyscall_setup(char *str)
|
||||
{
|
||||
if (str) {
|
||||
if (!strcmp("emulate", str))
|
||||
vsyscall_mode = EMULATE;
|
||||
else if (!strcmp("native", str))
|
||||
vsyscall_mode = NATIVE;
|
||||
else if (!strcmp("none", str))
|
||||
vsyscall_mode = NONE;
|
||||
else
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
early_param("vsyscall", vsyscall_setup);
|
||||
|
||||
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
|
||||
const char *message)
|
||||
{
|
||||
if (!show_unhandled_signals)
|
||||
return;
|
||||
|
||||
printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
|
||||
level, current->comm, task_pid_nr(current),
|
||||
message, regs->ip, regs->cs,
|
||||
regs->sp, regs->ax, regs->si, regs->di);
|
||||
}
|
||||
|
||||
static int addr_to_vsyscall_nr(unsigned long addr)
|
||||
{
|
||||
int nr;
|
||||
|
||||
if ((addr & ~0xC00UL) != VSYSCALL_ADDR)
|
||||
return -EINVAL;
|
||||
|
||||
nr = (addr & 0xC00UL) >> 10;
|
||||
if (nr >= 3)
|
||||
return -EINVAL;
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
static bool write_ok_or_segv(unsigned long ptr, size_t size)
|
||||
{
|
||||
/*
|
||||
* XXX: if access_ok, get_user, and put_user handled
|
||||
* sig_on_uaccess_error, this could go away.
|
||||
*/
|
||||
|
||||
if (!access_ok(VERIFY_WRITE, (void __user *)ptr, size)) {
|
||||
siginfo_t info;
|
||||
struct thread_struct *thread = ¤t->thread;
|
||||
|
||||
thread->error_code = 6; /* user fault, no page, write */
|
||||
thread->cr2 = ptr;
|
||||
thread->trap_nr = X86_TRAP_PF;
|
||||
|
||||
memset(&info, 0, sizeof(info));
|
||||
info.si_signo = SIGSEGV;
|
||||
info.si_errno = 0;
|
||||
info.si_code = SEGV_MAPERR;
|
||||
info.si_addr = (void __user *)ptr;
|
||||
|
||||
force_sig_info(SIGSEGV, &info, current);
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
struct task_struct *tsk;
|
||||
unsigned long caller;
|
||||
int vsyscall_nr, syscall_nr, tmp;
|
||||
int prev_sig_on_uaccess_error;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
* No point in checking CS -- the only way to get here is a user mode
|
||||
* trap to a high address, which means that we're in 64-bit user code.
|
||||
*/
|
||||
|
||||
WARN_ON_ONCE(address != regs->ip);
|
||||
|
||||
if (vsyscall_mode == NONE) {
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall attempted with vsyscall=none");
|
||||
return false;
|
||||
}
|
||||
|
||||
vsyscall_nr = addr_to_vsyscall_nr(address);
|
||||
|
||||
trace_emulate_vsyscall(vsyscall_nr);
|
||||
|
||||
if (vsyscall_nr < 0) {
|
||||
warn_bad_vsyscall(KERN_WARNING, regs,
|
||||
"misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
|
||||
goto sigsegv;
|
||||
}
|
||||
|
||||
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
|
||||
warn_bad_vsyscall(KERN_WARNING, regs,
|
||||
"vsyscall with bad stack (exploit attempt?)");
|
||||
goto sigsegv;
|
||||
}
|
||||
|
||||
tsk = current;
|
||||
|
||||
/*
|
||||
* Check for access_ok violations and find the syscall nr.
|
||||
*
|
||||
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
|
||||
* 64-bit, so we don't need to special-case it here. For all the
|
||||
* vsyscalls, NULL means "don't write anything" not "write it at
|
||||
* address 0".
|
||||
*/
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(struct timezone))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_gettimeofday;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_time;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
|
||||
!write_ok_or_segv(regs->si, sizeof(unsigned))) {
|
||||
ret = -EFAULT;
|
||||
goto check_fault;
|
||||
}
|
||||
|
||||
syscall_nr = __NR_getcpu;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle seccomp. regs->ip must be the original value.
|
||||
* See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
|
||||
*
|
||||
* We could optimize the seccomp disabled case, but performance
|
||||
* here doesn't matter.
|
||||
*/
|
||||
regs->orig_ax = syscall_nr;
|
||||
regs->ax = -ENOSYS;
|
||||
tmp = secure_computing();
|
||||
if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
|
||||
warn_bad_vsyscall(KERN_DEBUG, regs,
|
||||
"seccomp tried to change syscall nr or ip");
|
||||
do_exit(SIGSYS);
|
||||
}
|
||||
regs->orig_ax = -1;
|
||||
if (tmp)
|
||||
goto do_ret; /* skip requested */
|
||||
|
||||
/*
|
||||
* With a real vsyscall, page faults cause SIGSEGV. We want to
|
||||
* preserve that behavior to make writing exploits harder.
|
||||
*/
|
||||
prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
|
||||
current_thread_info()->sig_on_uaccess_error = 1;
|
||||
|
||||
ret = -EFAULT;
|
||||
switch (vsyscall_nr) {
|
||||
case 0:
|
||||
ret = sys_gettimeofday(
|
||||
(struct timeval __user *)regs->di,
|
||||
(struct timezone __user *)regs->si);
|
||||
break;
|
||||
|
||||
case 1:
|
||||
ret = sys_time((time_t __user *)regs->di);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ret = sys_getcpu((unsigned __user *)regs->di,
|
||||
(unsigned __user *)regs->si,
|
||||
NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
|
||||
|
||||
check_fault:
|
||||
if (ret == -EFAULT) {
|
||||
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
|
||||
warn_bad_vsyscall(KERN_INFO, regs,
|
||||
"vsyscall fault (exploit attempt?)");
|
||||
|
||||
/*
|
||||
* If we failed to generate a signal for any reason,
|
||||
* generate one here. (This should be impossible.)
|
||||
*/
|
||||
if (WARN_ON_ONCE(!sigismember(&tsk->pending.signal, SIGBUS) &&
|
||||
!sigismember(&tsk->pending.signal, SIGSEGV)))
|
||||
goto sigsegv;
|
||||
|
||||
return true; /* Don't emulate the ret. */
|
||||
}
|
||||
|
||||
regs->ax = ret;
|
||||
|
||||
do_ret:
|
||||
/* Emulate a ret instruction. */
|
||||
regs->ip = caller;
|
||||
regs->sp += 8;
|
||||
return true;
|
||||
|
||||
sigsegv:
|
||||
force_sig(SIGSEGV, current);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* A pseudo VMA to allow ptrace access for the vsyscall page. This only
|
||||
* covers the 64bit vsyscall page now. 32bit has a real VMA now and does
|
||||
* not need special handling anymore:
|
||||
*/
|
||||
static const char *gate_vma_name(struct vm_area_struct *vma)
|
||||
{
|
||||
return "[vsyscall]";
|
||||
}
|
||||
static struct vm_operations_struct gate_vma_ops = {
|
||||
.name = gate_vma_name,
|
||||
};
|
||||
static struct vm_area_struct gate_vma = {
|
||||
.vm_start = VSYSCALL_ADDR,
|
||||
.vm_end = VSYSCALL_ADDR + PAGE_SIZE,
|
||||
.vm_page_prot = PAGE_READONLY_EXEC,
|
||||
.vm_flags = VM_READ | VM_EXEC,
|
||||
.vm_ops = &gate_vma_ops,
|
||||
};
|
||||
|
||||
struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (!mm || mm->context.ia32_compat)
|
||||
return NULL;
|
||||
#endif
|
||||
if (vsyscall_mode == NONE)
|
||||
return NULL;
|
||||
return &gate_vma;
|
||||
}
|
||||
|
||||
int in_gate_area(struct mm_struct *mm, unsigned long addr)
|
||||
{
|
||||
struct vm_area_struct *vma = get_gate_vma(mm);
|
||||
|
||||
if (!vma)
|
||||
return 0;
|
||||
|
||||
return (addr >= vma->vm_start) && (addr < vma->vm_end);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this when you have no reliable mm, typically from interrupt
|
||||
* context. It is less reliable than using a task's mm and may give
|
||||
* false positives.
|
||||
*/
|
||||
int in_gate_area_no_mm(unsigned long addr)
|
||||
{
|
||||
return vsyscall_mode != NONE && (addr & PAGE_MASK) == VSYSCALL_ADDR;
|
||||
}
|
||||
|
||||
void __init map_vsyscall(void)
|
||||
{
|
||||
extern char __vsyscall_page;
|
||||
unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
|
||||
|
||||
if (vsyscall_mode != NONE)
|
||||
__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
|
||||
vsyscall_mode == NATIVE
|
||||
? PAGE_KERNEL_VSYSCALL
|
||||
: PAGE_KERNEL_VVAR);
|
||||
|
||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
||||
(unsigned long)VSYSCALL_ADDR);
|
||||
}
|
37
arch/x86/entry/vsyscall/vsyscall_emu_64.S
Normal file
37
arch/x86/entry/vsyscall/vsyscall_emu_64.S
Normal file
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* vsyscall_emu_64.S: Vsyscall emulation page
|
||||
*
|
||||
* Copyright (c) 2011 Andy Lutomirski
|
||||
*
|
||||
* Subject to the GNU General Public License, version 2
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
#include <asm/irq_vectors.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/unistd_64.h>
|
||||
|
||||
__PAGE_ALIGNED_DATA
|
||||
.globl __vsyscall_page
|
||||
.balign PAGE_SIZE, 0xcc
|
||||
.type __vsyscall_page, @object
|
||||
__vsyscall_page:
|
||||
|
||||
mov $__NR_gettimeofday, %rax
|
||||
syscall
|
||||
ret
|
||||
|
||||
.balign 1024, 0xcc
|
||||
mov $__NR_time, %rax
|
||||
syscall
|
||||
ret
|
||||
|
||||
.balign 1024, 0xcc
|
||||
mov $__NR_getcpu, %rax
|
||||
syscall
|
||||
ret
|
||||
|
||||
.balign 4096, 0xcc
|
||||
|
||||
.size __vsyscall_page, 4096
|
70
arch/x86/entry/vsyscall/vsyscall_gtod.c
Normal file
70
arch/x86/entry/vsyscall/vsyscall_gtod.c
Normal file
@@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
|
||||
* Copyright 2003 Andi Kleen, SuSE Labs.
|
||||
*
|
||||
* Modified for x86 32 bit architecture by
|
||||
* Stefani Seibold <stefani@seibold.net>
|
||||
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
|
||||
*
|
||||
* Thanks to hpa@transmeta.com for some useful hint.
|
||||
* Special thanks to Ingo Molnar for his early experience with
|
||||
* a different vsyscall implementation for Linux/IA32 and for the name.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/timekeeper_internal.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/vvar.h>
|
||||
|
||||
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data);
|
||||
|
||||
void update_vsyscall_tz(void)
|
||||
{
|
||||
vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest;
|
||||
vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime;
|
||||
}
|
||||
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
|
||||
|
||||
gtod_write_begin(vdata);
|
||||
|
||||
/* copy vsyscall data */
|
||||
vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
|
||||
vdata->cycle_last = tk->tkr_mono.cycle_last;
|
||||
vdata->mask = tk->tkr_mono.mask;
|
||||
vdata->mult = tk->tkr_mono.mult;
|
||||
vdata->shift = tk->tkr_mono.shift;
|
||||
|
||||
vdata->wall_time_sec = tk->xtime_sec;
|
||||
vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
|
||||
|
||||
vdata->monotonic_time_sec = tk->xtime_sec
|
||||
+ tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
|
||||
+ ((u64)tk->wall_to_monotonic.tv_nsec
|
||||
<< tk->tkr_mono.shift);
|
||||
while (vdata->monotonic_time_snsec >=
|
||||
(((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
|
||||
vdata->monotonic_time_snsec -=
|
||||
((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
|
||||
vdata->monotonic_time_sec++;
|
||||
}
|
||||
|
||||
vdata->wall_time_coarse_sec = tk->xtime_sec;
|
||||
vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
|
||||
tk->tkr_mono.shift);
|
||||
|
||||
vdata->monotonic_time_coarse_sec =
|
||||
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
|
||||
vdata->monotonic_time_coarse_nsec =
|
||||
vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
|
||||
|
||||
while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) {
|
||||
vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC;
|
||||
vdata->monotonic_time_coarse_sec++;
|
||||
}
|
||||
|
||||
gtod_write_end(vdata);
|
||||
}
|
29
arch/x86/entry/vsyscall/vsyscall_trace.h
Normal file
29
arch/x86/entry/vsyscall/vsyscall_trace.h
Normal file
@@ -0,0 +1,29 @@
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM vsyscall
|
||||
|
||||
#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define __VSYSCALL_TRACE_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
TRACE_EVENT(emulate_vsyscall,
|
||||
|
||||
TP_PROTO(int nr),
|
||||
|
||||
TP_ARGS(nr),
|
||||
|
||||
TP_STRUCT__entry(__field(int, nr)),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->nr = nr;
|
||||
),
|
||||
|
||||
TP_printk("nr = %d", __entry->nr)
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH ../../arch/x86/entry/vsyscall/
|
||||
#define TRACE_INCLUDE_FILE vsyscall_trace
|
||||
#include <trace/define_trace.h>
|
Reference in New Issue
Block a user