x86_64: move vdso
Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
1
arch/x86/vdso/.gitignore
vendored
Normal file
1
arch/x86/vdso/.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
vdso.lds
|
49
arch/x86/vdso/Makefile
Normal file
49
arch/x86/vdso/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
#
|
||||
# x86-64 vDSO.
|
||||
#
|
||||
|
||||
# files to link into the vdso
|
||||
# vdso-start.o has to be first
|
||||
vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
|
||||
|
||||
# files to link into kernel
|
||||
obj-y := vma.o vdso.o vdso-syms.o
|
||||
|
||||
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
|
||||
|
||||
$(obj)/vdso.o: $(obj)/vdso.so
|
||||
|
||||
targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
|
||||
|
||||
# The DSO images are built using a special linker script.
|
||||
quiet_cmd_syscall = SYSCALL $@
|
||||
cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
|
||||
-Wl,-T,$(filter-out FORCE,$^) -o $@
|
||||
|
||||
export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
|
||||
|
||||
vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
|
||||
$(call ld-option, -Wl$(comma)--hash-style=sysv) \
|
||||
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
|
||||
SYSCFLAGS_vdso.so = $(vdso-flags)
|
||||
|
||||
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
|
||||
|
||||
$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
|
||||
$(call if_changed,syscall)
|
||||
|
||||
CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
|
||||
|
||||
$(obj)/vclock_gettime.o: CFLAGS = $(CFL)
|
||||
$(obj)/vgetcpu.o: CFLAGS = $(CFL)
|
||||
|
||||
# We also create a special relocatable object that should mirror the symbol
|
||||
# table and layout of the linked DSO. With ld -R we can then refer to
|
||||
# these symbols in the kernel code rather than hand-coded addresses.
|
||||
extra-y += vdso-syms.o
|
||||
$(obj)/built-in.o: $(obj)/vdso-syms.o
|
||||
$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
|
||||
|
||||
SYSCFLAGS_vdso-syms.o = -r -d
|
||||
$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
|
||||
$(call if_changed,syscall)
|
121
arch/x86/vdso/vclock_gettime.c
Normal file
121
arch/x86/vdso/vclock_gettime.c
Normal file
@@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of clock_gettime and gettimeofday.
|
||||
*
|
||||
* The code should have no internal unresolved relocations.
|
||||
* Check with readelf after changing.
|
||||
* Also alternative() doesn't work.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/posix-timers.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/timex.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include "vextern.h"
|
||||
|
||||
#define gtod vdso_vsyscall_gtod_data
|
||||
|
||||
static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline long vgetns(void)
|
||||
{
|
||||
long v;
|
||||
cycles_t (*vread)(void);
|
||||
vread = gtod->clock.vread;
|
||||
v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
|
||||
return (v * gtod->clock.mult) >> gtod->clock.shift;
|
||||
}
|
||||
|
||||
static noinline int do_realtime(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq, ns;
|
||||
do {
|
||||
seq = read_seqbegin(>od->lock);
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ts->tv_nsec = gtod->wall_time_nsec;
|
||||
ns = vgetns();
|
||||
} while (unlikely(read_seqretry(>od->lock, seq)));
|
||||
timespec_add_ns(ts, ns);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Copy of the version in kernel/time.c which we cannot directly access */
|
||||
static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
|
||||
{
|
||||
while (nsec >= NSEC_PER_SEC) {
|
||||
nsec -= NSEC_PER_SEC;
|
||||
++sec;
|
||||
}
|
||||
while (nsec < 0) {
|
||||
nsec += NSEC_PER_SEC;
|
||||
--sec;
|
||||
}
|
||||
ts->tv_sec = sec;
|
||||
ts->tv_nsec = nsec;
|
||||
}
|
||||
|
||||
static noinline int do_monotonic(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq, ns, secs;
|
||||
do {
|
||||
seq = read_seqbegin(>od->lock);
|
||||
secs = gtod->wall_time_sec;
|
||||
ns = gtod->wall_time_nsec + vgetns();
|
||||
secs += gtod->wall_to_monotonic.tv_sec;
|
||||
ns += gtod->wall_to_monotonic.tv_nsec;
|
||||
} while (unlikely(read_seqretry(>od->lock, seq)));
|
||||
vset_normalized_timespec(ts, secs, ns);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
|
||||
{
|
||||
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
|
||||
switch (clock) {
|
||||
case CLOCK_REALTIME:
|
||||
return do_realtime(ts);
|
||||
case CLOCK_MONOTONIC:
|
||||
return do_monotonic(ts);
|
||||
}
|
||||
return vdso_fallback_gettime(clock, ts);
|
||||
}
|
||||
int clock_gettime(clockid_t, struct timespec *)
|
||||
__attribute__((weak, alias("__vdso_clock_gettime")));
|
||||
|
||||
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
|
||||
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
|
||||
offsetof(struct timespec, tv_nsec) ||
|
||||
sizeof(*tv) != sizeof(struct timespec));
|
||||
do_realtime((struct timespec *)tv);
|
||||
tv->tv_usec /= 1000;
|
||||
if (unlikely(tz != NULL)) {
|
||||
/* This relies on gcc inlining the memcpy. We'll notice
|
||||
if it ever fails to do so. */
|
||||
memcpy(tz, >od->sys_tz, sizeof(struct timezone));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
|
||||
return ret;
|
||||
}
|
||||
int gettimeofday(struct timeval *, struct timezone *)
|
||||
__attribute__((weak, alias("__vdso_gettimeofday")));
|
12
arch/x86/vdso/vdso-note.S
Normal file
12
arch/x86/vdso/vdso-note.S
Normal file
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
|
||||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/uts.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ELFNOTE_END
|
2
arch/x86/vdso/vdso-start.S
Normal file
2
arch/x86/vdso/vdso-start.S
Normal file
@@ -0,0 +1,2 @@
|
||||
.globl vdso_kernel_start
|
||||
vdso_kernel_start:
|
2
arch/x86/vdso/vdso.S
Normal file
2
arch/x86/vdso/vdso.S
Normal file
@@ -0,0 +1,2 @@
|
||||
.section ".vdso","a"
|
||||
.incbin "arch/x86/vdso/vdso.so"
|
77
arch/x86/vdso/vdso.lds.S
Normal file
77
arch/x86/vdso/vdso.lds.S
Normal file
@@ -0,0 +1,77 @@
|
||||
/*
|
||||
* Linker script for vsyscall DSO. The vsyscall page is an ELF shared
|
||||
* object prelinked to its virtual address, and with only one read-only
|
||||
* segment (that fits in one page). This script controls its layout.
|
||||
*/
|
||||
#include <asm/asm-offsets.h>
|
||||
#include "voffset.h"
|
||||
|
||||
#define VDSO_PRELINK 0xffffffffff700000
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
. = VDSO_PRELINK + SIZEOF_HEADERS;
|
||||
|
||||
.hash : { *(.hash) } :text
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
|
||||
/* This linker script is used both with -r and with -shared.
|
||||
For the layouts to match, we need to skip more than enough
|
||||
space for the dynamic symbol table et al. If this amount
|
||||
is insufficient, ld -shared will barf. Just increase it here. */
|
||||
. = VDSO_PRELINK + VDSO_TEXT_OFFSET;
|
||||
|
||||
.text : { *(.text) } :text
|
||||
.text.ptr : { *(.text.ptr) } :text
|
||||
. = VDSO_PRELINK + 0x900;
|
||||
.data : { *(.data) } :text
|
||||
.bss : { *(.bss) } :text
|
||||
|
||||
.altinstructions : { *(.altinstructions) } :text
|
||||
.altinstr_replacement : { *(.altinstr_replacement) } :text
|
||||
|
||||
.note : { *(.note.*) } :text :note
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
|
||||
.eh_frame : { KEEP (*(.eh_frame)) } :text
|
||||
.dynamic : { *(.dynamic) } :text :dynamic
|
||||
.useless : {
|
||||
*(.got.plt) *(.got)
|
||||
*(.gnu.linkonce.d.*)
|
||||
*(.dynbss)
|
||||
*(.gnu.linkonce.b.*)
|
||||
} :text
|
||||
}
|
||||
|
||||
/*
|
||||
* We must supply the ELF program headers explicitly to get just one
|
||||
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
|
||||
*/
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
|
||||
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
|
||||
note PT_NOTE FLAGS(4); /* PF_R */
|
||||
eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
|
||||
}
|
||||
|
||||
/*
|
||||
* This controls what symbols we export from the DSO.
|
||||
*/
|
||||
VERSION
|
||||
{
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
clock_gettime;
|
||||
__vdso_clock_gettime;
|
||||
gettimeofday;
|
||||
__vdso_gettimeofday;
|
||||
getcpu;
|
||||
__vdso_getcpu;
|
||||
local: *;
|
||||
};
|
||||
}
|
16
arch/x86/vdso/vextern.h
Normal file
16
arch/x86/vdso/vextern.h
Normal file
@@ -0,0 +1,16 @@
|
||||
#ifndef VEXTERN
|
||||
#include <asm/vsyscall.h>
|
||||
#define VEXTERN(x) \
|
||||
extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
|
||||
#endif
|
||||
|
||||
#define VMAGIC 0xfeedbabeabcdefabUL
|
||||
|
||||
/* Any kernel variables used in the vDSO must be exported in the main
|
||||
kernel's vmlinux.lds.S/vsyscall.h/proper __section and
|
||||
put into vextern.h and be referenced as a pointer with vdso prefix.
|
||||
The main kernel later fills in the values. */
|
||||
|
||||
VEXTERN(jiffies)
|
||||
VEXTERN(vgetcpu_mode)
|
||||
VEXTERN(vsyscall_gtod_data)
|
50
arch/x86/vdso/vgetcpu.c
Normal file
50
arch/x86/vdso/vgetcpu.c
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of getcpu()
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/getcpu.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include "vextern.h"
|
||||
|
||||
long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
||||
{
|
||||
unsigned int dummy, p;
|
||||
unsigned long j = 0;
|
||||
|
||||
/* Fast cache - only recompute value once per jiffies and avoid
|
||||
relatively costly rdtscp/cpuid otherwise.
|
||||
This works because the scheduler usually keeps the process
|
||||
on the same CPU and this syscall doesn't guarantee its
|
||||
results anyways.
|
||||
We do this here because otherwise user space would do it on
|
||||
its own in a likely inferior way (no access to jiffies).
|
||||
If you don't like it pass NULL. */
|
||||
if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
|
||||
p = tcache->blob[1];
|
||||
} else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
|
||||
/* Load per CPU data from RDTSCP */
|
||||
rdtscp(dummy, dummy, p);
|
||||
} else {
|
||||
/* Load per CPU data from GDT */
|
||||
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
||||
}
|
||||
if (tcache) {
|
||||
tcache->blob[0] = j;
|
||||
tcache->blob[1] = p;
|
||||
}
|
||||
if (cpu)
|
||||
*cpu = p & 0xfff;
|
||||
if (node)
|
||||
*node = p >> 12;
|
||||
return 0;
|
||||
}
|
||||
|
||||
long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
||||
__attribute__((weak, alias("__vdso_getcpu")));
|
140
arch/x86/vdso/vma.c
Normal file
140
arch/x86/vdso/vma.c
Normal file
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Set up the VMAs to tell the VM about the vDSO.
|
||||
* Copyright 2007 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GPL, v.2
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/random.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/proto.h>
|
||||
#include "voffset.h"
|
||||
|
||||
int vdso_enabled = 1;
|
||||
|
||||
#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
|
||||
#include "vextern.h"
|
||||
#undef VEXTERN
|
||||
|
||||
extern char vdso_kernel_start[], vdso_start[], vdso_end[];
|
||||
extern unsigned short vdso_sync_cpuid;
|
||||
|
||||
struct page **vdso_pages;
|
||||
|
||||
static inline void *var_ref(void *vbase, char *var, char *name)
|
||||
{
|
||||
unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
|
||||
void *p = vbase + offset;
|
||||
if (*(void **)p != (void *)VMAGIC) {
|
||||
printk("VDSO: variable %s broken\n", name);
|
||||
vdso_enabled = 0;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int __init init_vdso_vars(void)
|
||||
{
|
||||
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
int i;
|
||||
char *vbase;
|
||||
|
||||
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
|
||||
if (!vdso_pages)
|
||||
goto oom;
|
||||
for (i = 0; i < npages; i++) {
|
||||
struct page *p;
|
||||
p = alloc_page(GFP_KERNEL);
|
||||
if (!p)
|
||||
goto oom;
|
||||
vdso_pages[i] = p;
|
||||
copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
|
||||
}
|
||||
|
||||
vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
|
||||
if (!vbase)
|
||||
goto oom;
|
||||
|
||||
if (memcmp(vbase, "\177ELF", 4)) {
|
||||
printk("VDSO: I'm broken; not ELF\n");
|
||||
vdso_enabled = 0;
|
||||
}
|
||||
|
||||
#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
|
||||
#define VEXTERN(x) \
|
||||
V(vdso_ ## x) = &__ ## x;
|
||||
#include "vextern.h"
|
||||
#undef VEXTERN
|
||||
return 0;
|
||||
|
||||
oom:
|
||||
printk("Cannot allocate vdso\n");
|
||||
vdso_enabled = 0;
|
||||
return -ENOMEM;
|
||||
}
|
||||
__initcall(init_vdso_vars);
|
||||
|
||||
struct linux_binprm;
|
||||
|
||||
/* Put the vdso above the (randomized) stack with another randomized offset.
|
||||
This way there is no hole in the middle of address space.
|
||||
To save memory make sure it is still in the same PTE as the stack top.
|
||||
This doesn't give that many random bits */
|
||||
static unsigned long vdso_addr(unsigned long start, unsigned len)
|
||||
{
|
||||
unsigned long addr, end;
|
||||
unsigned offset;
|
||||
end = (start + PMD_SIZE - 1) & PMD_MASK;
|
||||
if (end >= TASK_SIZE64)
|
||||
end = TASK_SIZE64;
|
||||
end -= len;
|
||||
/* This loses some more bits than a modulo, but is cheaper */
|
||||
offset = get_random_int() & (PTRS_PER_PTE - 1);
|
||||
addr = start + (offset << PAGE_SHIFT);
|
||||
if (addr >= end)
|
||||
addr = end;
|
||||
return addr;
|
||||
}
|
||||
|
||||
/* Setup a VMA at program startup for the vsyscall page.
|
||||
Not called for compat tasks */
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
|
||||
|
||||
if (!vdso_enabled)
|
||||
return 0;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
addr = vdso_addr(mm->start_stack, len);
|
||||
addr = get_unmapped_area(NULL, addr, len, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
ret = install_special_mapping(mm, addr, len,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
|
||||
VM_ALWAYSDUMP,
|
||||
vdso_pages);
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
|
||||
current->mm->context.vdso = (void *)addr;
|
||||
up_fail:
|
||||
up_write(&mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __init int vdso_setup(char *s)
|
||||
{
|
||||
vdso_enabled = simple_strtoul(s, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
__setup("vdso=", vdso_setup);
|
1
arch/x86/vdso/voffset.h
Normal file
1
arch/x86/vdso/voffset.h
Normal file
@@ -0,0 +1 @@
|
||||
#define VDSO_TEXT_OFFSET 0x600
|
12
arch/x86/vdso/vvar.c
Normal file
12
arch/x86/vdso/vvar.c
Normal file
@@ -0,0 +1,12 @@
|
||||
/* Define pointer to external vDSO variables.
|
||||
These are part of the vDSO. The kernel fills in the real addresses
|
||||
at boot time. This is done because when the vdso is linked the
|
||||
kernel isn't yet and we don't know the final addresses. */
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/timex.h>
|
||||
#include <asm/vgtod.h>
|
||||
|
||||
#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
|
||||
#include "vextern.h"
|
Reference in New Issue
Block a user