x86_64: move vdso

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Thomas Gleixner
2007-10-11 11:17:10 +02:00
parent 185f3d3890
commit 7648b1330c
14 changed files with 3 additions and 3 deletions

1
arch/x86/vdso/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
vdso.lds

49
arch/x86/vdso/Makefile Normal file
View File

@@ -0,0 +1,49 @@
#
# x86-64 vDSO.
#
# files to link into the vdso
# vdso-start.o has to be first
vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
# files to link into kernel
obj-y := vma.o vdso.o vdso-syms.o
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
$(obj)/vdso.o: $(obj)/vdso.so
targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
# The DSO images are built using a special linker script.
quiet_cmd_syscall = SYSCALL $@
cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
-Wl,-T,$(filter-out FORCE,$^) -o $@
export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
$(call ld-option, -Wl$(comma)--hash-style=sysv) \
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
SYSCFLAGS_vdso.so = $(vdso-flags)
$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,syscall)
CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
$(obj)/vclock_gettime.o: CFLAGS = $(CFL)
$(obj)/vgetcpu.o: CFLAGS = $(CFL)
# We also create a special relocatable object that should mirror the symbol
# table and layout of the linked DSO. With ld -R we can then refer to
# these symbols in the kernel code rather than hand-coded addresses.
extra-y += vdso-syms.o
$(obj)/built-in.o: $(obj)/vdso-syms.o
$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
SYSCFLAGS_vdso-syms.o = -r -d
$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
$(call if_changed,syscall)

View File

@@ -0,0 +1,121 @@
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
* Subject to the GNU Public License, v.2
*
* Fast user context implementation of clock_gettime and gettimeofday.
*
* The code should have no internal unresolved relocations.
* Check with readelf after changing.
* Also alternative() doesn't work.
*/
#include <linux/kernel.h>
#include <linux/posix-timers.h>
#include <linux/time.h>
#include <linux/string.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/timex.h>
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
#include <asm/vgtod.h>
#include "vextern.h"
#define gtod vdso_vsyscall_gtod_data
static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
asm("syscall" : "=a" (ret) :
"0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
return ret;
}
static inline long vgetns(void)
{
long v;
cycles_t (*vread)(void);
vread = gtod->clock.vread;
v = (vread() - gtod->clock.cycle_last) & gtod->clock.mask;
return (v * gtod->clock.mult) >> gtod->clock.shift;
}
static noinline int do_realtime(struct timespec *ts)
{
unsigned long seq, ns;
do {
seq = read_seqbegin(&gtod->lock);
ts->tv_sec = gtod->wall_time_sec;
ts->tv_nsec = gtod->wall_time_nsec;
ns = vgetns();
} while (unlikely(read_seqretry(&gtod->lock, seq)));
timespec_add_ns(ts, ns);
return 0;
}
/* Copy of the version in kernel/time.c which we cannot directly access */
static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
{
while (nsec >= NSEC_PER_SEC) {
nsec -= NSEC_PER_SEC;
++sec;
}
while (nsec < 0) {
nsec += NSEC_PER_SEC;
--sec;
}
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}
static noinline int do_monotonic(struct timespec *ts)
{
unsigned long seq, ns, secs;
do {
seq = read_seqbegin(&gtod->lock);
secs = gtod->wall_time_sec;
ns = gtod->wall_time_nsec + vgetns();
secs += gtod->wall_to_monotonic.tv_sec;
ns += gtod->wall_to_monotonic.tv_nsec;
} while (unlikely(read_seqretry(&gtod->lock, seq)));
vset_normalized_timespec(ts, secs, ns);
return 0;
}
int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled && gtod->clock.vread))
switch (clock) {
case CLOCK_REALTIME:
return do_realtime(ts);
case CLOCK_MONOTONIC:
return do_monotonic(ts);
}
return vdso_fallback_gettime(clock, ts);
}
int clock_gettime(clockid_t, struct timespec *)
__attribute__((weak, alias("__vdso_clock_gettime")));
int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
{
long ret;
if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
offsetof(struct timespec, tv_nsec) ||
sizeof(*tv) != sizeof(struct timespec));
do_realtime((struct timespec *)tv);
tv->tv_usec /= 1000;
if (unlikely(tz != NULL)) {
/* This relies on gcc inlining the memcpy. We'll notice
if it ever fails to do so. */
memcpy(tz, &gtod->sys_tz, sizeof(struct timezone));
}
return 0;
}
asm("syscall" : "=a" (ret) :
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
return ret;
}
int gettimeofday(struct timeval *, struct timezone *)
__attribute__((weak, alias("__vdso_gettimeofday")));

12
arch/x86/vdso/vdso-note.S Normal file
View File

@@ -0,0 +1,12 @@
/*
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
* Here we can supply some information useful to userland.
*/
#include <linux/uts.h>
#include <linux/version.h>
#include <linux/elfnote.h>
ELFNOTE_START(Linux, 0, "a")
.long LINUX_VERSION_CODE
ELFNOTE_END

View File

@@ -0,0 +1,2 @@
.globl vdso_kernel_start
vdso_kernel_start:

2
arch/x86/vdso/vdso.S Normal file
View File

@@ -0,0 +1,2 @@
.section ".vdso","a"
.incbin "arch/x86/vdso/vdso.so"

77
arch/x86/vdso/vdso.lds.S Normal file
View File

@@ -0,0 +1,77 @@
/*
* Linker script for vsyscall DSO. The vsyscall page is an ELF shared
* object prelinked to its virtual address, and with only one read-only
* segment (that fits in one page). This script controls its layout.
*/
#include <asm/asm-offsets.h>
#include "voffset.h"
#define VDSO_PRELINK 0xffffffffff700000
SECTIONS
{
. = VDSO_PRELINK + SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.gnu.version : { *(.gnu.version) }
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
/* This linker script is used both with -r and with -shared.
For the layouts to match, we need to skip more than enough
space for the dynamic symbol table et al. If this amount
is insufficient, ld -shared will barf. Just increase it here. */
. = VDSO_PRELINK + VDSO_TEXT_OFFSET;
.text : { *(.text) } :text
.text.ptr : { *(.text.ptr) } :text
. = VDSO_PRELINK + 0x900;
.data : { *(.data) } :text
.bss : { *(.bss) } :text
.altinstructions : { *(.altinstructions) } :text
.altinstr_replacement : { *(.altinstr_replacement) } :text
.note : { *(.note.*) } :text :note
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.dynamic : { *(.dynamic) } :text :dynamic
.useless : {
*(.got.plt) *(.got)
*(.gnu.linkonce.d.*)
*(.dynbss)
*(.gnu.linkonce.b.*)
} :text
}
/*
* We must supply the ELF program headers explicitly to get just one
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
*/
PHDRS
{
text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
}
/*
* This controls what symbols we export from the DSO.
*/
VERSION
{
LINUX_2.6 {
global:
clock_gettime;
__vdso_clock_gettime;
gettimeofday;
__vdso_gettimeofday;
getcpu;
__vdso_getcpu;
local: *;
};
}

16
arch/x86/vdso/vextern.h Normal file
View File

@@ -0,0 +1,16 @@
#ifndef VEXTERN
#include <asm/vsyscall.h>
#define VEXTERN(x) \
extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
#endif
#define VMAGIC 0xfeedbabeabcdefabUL
/* Any kernel variables used in the vDSO must be exported in the main
kernel's vmlinux.lds.S/vsyscall.h/proper __section and
put into vextern.h and be referenced as a pointer with vdso prefix.
The main kernel later fills in the values. */
VEXTERN(jiffies)
VEXTERN(vgetcpu_mode)
VEXTERN(vsyscall_gtod_data)

50
arch/x86/vdso/vgetcpu.c Normal file
View File

@@ -0,0 +1,50 @@
/*
* Copyright 2006 Andi Kleen, SUSE Labs.
* Subject to the GNU Public License, v.2
*
* Fast user context implementation of getcpu()
*/
#include <linux/kernel.h>
#include <linux/getcpu.h>
#include <linux/jiffies.h>
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include "vextern.h"
long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
{
unsigned int dummy, p;
unsigned long j = 0;
/* Fast cache - only recompute value once per jiffies and avoid
relatively costly rdtscp/cpuid otherwise.
This works because the scheduler usually keeps the process
on the same CPU and this syscall doesn't guarantee its
results anyways.
We do this here because otherwise user space would do it on
its own in a likely inferior way (no access to jiffies).
If you don't like it pass NULL. */
if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
p = tcache->blob[1];
} else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
/* Load per CPU data from RDTSCP */
rdtscp(dummy, dummy, p);
} else {
/* Load per CPU data from GDT */
asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
}
if (tcache) {
tcache->blob[0] = j;
tcache->blob[1] = p;
}
if (cpu)
*cpu = p & 0xfff;
if (node)
*node = p >> 12;
return 0;
}
long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
__attribute__((weak, alias("__vdso_getcpu")));

140
arch/x86/vdso/vma.c Normal file
View File

@@ -0,0 +1,140 @@
/*
* Set up the VMAs to tell the VM about the vDSO.
* Copyright 2007 Andi Kleen, SUSE Labs.
* Subject to the GPL, v.2
*/
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/random.h>
#include <asm/vsyscall.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include "voffset.h"
int vdso_enabled = 1;
#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
#include "vextern.h"
#undef VEXTERN
extern char vdso_kernel_start[], vdso_start[], vdso_end[];
extern unsigned short vdso_sync_cpuid;
struct page **vdso_pages;
static inline void *var_ref(void *vbase, char *var, char *name)
{
unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
void *p = vbase + offset;
if (*(void **)p != (void *)VMAGIC) {
printk("VDSO: variable %s broken\n", name);
vdso_enabled = 0;
}
return p;
}
static int __init init_vdso_vars(void)
{
int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
int i;
char *vbase;
vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
if (!vdso_pages)
goto oom;
for (i = 0; i < npages; i++) {
struct page *p;
p = alloc_page(GFP_KERNEL);
if (!p)
goto oom;
vdso_pages[i] = p;
copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
}
vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
if (!vbase)
goto oom;
if (memcmp(vbase, "\177ELF", 4)) {
printk("VDSO: I'm broken; not ELF\n");
vdso_enabled = 0;
}
#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
#define VEXTERN(x) \
V(vdso_ ## x) = &__ ## x;
#include "vextern.h"
#undef VEXTERN
return 0;
oom:
printk("Cannot allocate vdso\n");
vdso_enabled = 0;
return -ENOMEM;
}
__initcall(init_vdso_vars);
struct linux_binprm;
/* Put the vdso above the (randomized) stack with another randomized offset.
This way there is no hole in the middle of address space.
To save memory make sure it is still in the same PTE as the stack top.
This doesn't give that many random bits */
static unsigned long vdso_addr(unsigned long start, unsigned len)
{
unsigned long addr, end;
unsigned offset;
end = (start + PMD_SIZE - 1) & PMD_MASK;
if (end >= TASK_SIZE64)
end = TASK_SIZE64;
end -= len;
/* This loses some more bits than a modulo, but is cheaper */
offset = get_random_int() & (PTRS_PER_PTE - 1);
addr = start + (offset << PAGE_SHIFT);
if (addr >= end)
addr = end;
return addr;
}
/* Setup a VMA at program startup for the vsyscall page.
Not called for compat tasks */
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
{
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret;
unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
if (!vdso_enabled)
return 0;
down_write(&mm->mmap_sem);
addr = vdso_addr(mm->start_stack, len);
addr = get_unmapped_area(NULL, addr, len, 0, 0);
if (IS_ERR_VALUE(addr)) {
ret = addr;
goto up_fail;
}
ret = install_special_mapping(mm, addr, len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
VM_ALWAYSDUMP,
vdso_pages);
if (ret)
goto up_fail;
current->mm->context.vdso = (void *)addr;
up_fail:
up_write(&mm->mmap_sem);
return ret;
}
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
return 0;
}
__setup("vdso=", vdso_setup);

1
arch/x86/vdso/voffset.h Normal file
View File

@@ -0,0 +1 @@
#define VDSO_TEXT_OFFSET 0x600

12
arch/x86/vdso/vvar.c Normal file
View File

@@ -0,0 +1,12 @@
/* Define pointer to external vDSO variables.
These are part of the vDSO. The kernel fills in the real addresses
at boot time. This is done because when the vdso is linked the
kernel isn't yet and we don't know the final addresses. */
#include <linux/kernel.h>
#include <linux/time.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
#include <asm/vgtod.h>
#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
#include "vextern.h"