x86/asm/entry, x86/vdso: Move the vDSO code to arch/x86/entry/vdso/
Cc: Borislav Petkov <bp@alien8.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Brian Gerst <brgerst@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
这个提交包含在:
7
arch/x86/entry/vdso/.gitignore
vendored
普通文件
7
arch/x86/entry/vdso/.gitignore
vendored
普通文件
@@ -0,0 +1,7 @@
|
||||
vdso.lds
|
||||
vdsox32.lds
|
||||
vdso32-syscall-syms.lds
|
||||
vdso32-sysenter-syms.lds
|
||||
vdso32-int80-syms.lds
|
||||
vdso-image-*.c
|
||||
vdso2c
|
209
arch/x86/entry/vdso/Makefile
普通文件
209
arch/x86/entry/vdso/Makefile
普通文件
@@ -0,0 +1,209 @@
|
||||
#
|
||||
# Building vDSO images for x86.
|
||||
#
|
||||
|
||||
KBUILD_CFLAGS += $(DISABLE_LTO)
|
||||
KASAN_SANITIZE := n
|
||||
|
||||
VDSO64-$(CONFIG_X86_64) := y
|
||||
VDSOX32-$(CONFIG_X86_X32_ABI) := y
|
||||
VDSO32-$(CONFIG_X86_32) := y
|
||||
VDSO32-$(CONFIG_COMPAT) := y
|
||||
|
||||
# files to link into the vdso
|
||||
vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
|
||||
|
||||
# files to link into kernel
|
||||
obj-y += vma.o
|
||||
|
||||
# vDSO images to build
|
||||
vdso_img-$(VDSO64-y) += 64
|
||||
vdso_img-$(VDSOX32-y) += x32
|
||||
vdso_img-$(VDSO32-y) += 32-int80
|
||||
vdso_img-$(CONFIG_COMPAT) += 32-syscall
|
||||
vdso_img-$(VDSO32-y) += 32-sysenter
|
||||
|
||||
obj-$(VDSO32-y) += vdso32-setup.o
|
||||
|
||||
vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
|
||||
|
||||
$(obj)/vdso.o: $(obj)/vdso.so
|
||||
|
||||
targets += vdso.lds $(vobjs-y)
|
||||
|
||||
# Build the vDSO image C files and link them in.
|
||||
vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o)
|
||||
vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c)
|
||||
vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg)
|
||||
obj-y += $(vdso_img_objs)
|
||||
targets += $(vdso_img_cfiles)
|
||||
targets += $(vdso_img_sodbg)
|
||||
.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \
|
||||
$(vdso_img-y:%=$(obj)/vdso%.so)
|
||||
|
||||
export CPPFLAGS_vdso.lds += -P -C
|
||||
|
||||
VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \
|
||||
-Wl,--no-undefined \
|
||||
-Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \
|
||||
$(DISABLE_LTO)
|
||||
|
||||
$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE
|
||||
$(call if_changed,vdso)
|
||||
|
||||
HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/x86/include/uapi
|
||||
hostprogs-y += vdso2c
|
||||
|
||||
quiet_cmd_vdso2c = VDSO2C $@
|
||||
define cmd_vdso2c
|
||||
$(obj)/vdso2c $< $(<:%.dbg=%) $@
|
||||
endef
|
||||
|
||||
$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE
|
||||
$(call if_changed,vdso2c)
|
||||
|
||||
#
|
||||
# Don't omit frame pointers for ease of userspace debugging, but do
|
||||
# optimize sibling calls.
|
||||
#
|
||||
CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
|
||||
$(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \
|
||||
-fno-omit-frame-pointer -foptimize-sibling-calls \
|
||||
-DDISABLE_BRANCH_PROFILING
|
||||
|
||||
$(vobjs): KBUILD_CFLAGS += $(CFL)
|
||||
|
||||
#
|
||||
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
|
||||
#
|
||||
CFLAGS_REMOVE_vdso-note.o = -pg
|
||||
CFLAGS_REMOVE_vclock_gettime.o = -pg
|
||||
CFLAGS_REMOVE_vgetcpu.o = -pg
|
||||
CFLAGS_REMOVE_vvar.o = -pg
|
||||
|
||||
#
|
||||
# X32 processes use x32 vDSO to access 64bit kernel data.
|
||||
#
|
||||
# Build x32 vDSO image:
|
||||
# 1. Compile x32 vDSO as 64bit.
|
||||
# 2. Convert object files to x32.
|
||||
# 3. Build x32 VDSO image with x32 objects, which contains 64bit codes
|
||||
# so that it can reach 64bit address space with 64bit pointers.
|
||||
#
|
||||
|
||||
CPPFLAGS_vdsox32.lds = $(CPPFLAGS_vdso.lds)
|
||||
VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \
|
||||
-Wl,-soname=linux-vdso.so.1 \
|
||||
-Wl,-z,max-page-size=4096 \
|
||||
-Wl,-z,common-page-size=4096
|
||||
|
||||
# 64-bit objects to re-brand as x32
|
||||
vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y))
|
||||
|
||||
# x32-rebranded versions
|
||||
vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o)
|
||||
|
||||
# same thing, but in the output directory
|
||||
vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F)
|
||||
|
||||
# Convert 64bit object file to x32 for x32 vDSO.
|
||||
quiet_cmd_x32 = X32 $@
|
||||
cmd_x32 = $(OBJCOPY) -O elf32-x86-64 $< $@
|
||||
|
||||
$(obj)/%-x32.o: $(obj)/%.o FORCE
|
||||
$(call if_changed,x32)
|
||||
|
||||
targets += vdsox32.lds $(vobjx32s-y)
|
||||
|
||||
$(obj)/%.so: OBJCOPYFLAGS := -S
|
||||
$(obj)/%.so: $(obj)/%.so.dbg
|
||||
$(call if_changed,objcopy)
|
||||
|
||||
$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
# Build multiple 32-bit vDSO images to choose from at boot time.
|
||||
#
|
||||
vdso32.so-$(VDSO32-y) += int80
|
||||
vdso32.so-$(CONFIG_COMPAT) += syscall
|
||||
vdso32.so-$(VDSO32-y) += sysenter
|
||||
|
||||
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
|
||||
|
||||
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
|
||||
VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1
|
||||
|
||||
# This makes sure the $(obj) subdirectory exists even though vdso32/
|
||||
# is not a kbuild sub-make subdirectory.
|
||||
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
|
||||
|
||||
targets += vdso32/vdso32.lds
|
||||
targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o)
|
||||
targets += vdso32/vclock_gettime.o
|
||||
|
||||
$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
|
||||
|
||||
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
|
||||
$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
|
||||
|
||||
KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
|
||||
KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
|
||||
KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector)
|
||||
KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
|
||||
KBUILD_CFLAGS_32 += -fno-omit-frame-pointer
|
||||
KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING
|
||||
$(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32)
|
||||
|
||||
$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
|
||||
$(obj)/vdso32/vdso32.lds \
|
||||
$(obj)/vdso32/vclock_gettime.o \
|
||||
$(obj)/vdso32/note.o \
|
||||
$(obj)/vdso32/%.o
|
||||
$(call if_changed,vdso)
|
||||
|
||||
#
|
||||
# The DSO images are built using a special linker script.
|
||||
#
|
||||
quiet_cmd_vdso = VDSO $@
|
||||
cmd_vdso = $(CC) -nostdlib -o $@ \
|
||||
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
|
||||
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
|
||||
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
|
||||
|
||||
VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
|
||||
$(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS)
|
||||
GCOV_PROFILE := n
|
||||
|
||||
#
|
||||
# Install the unstripped copies of vdso*.so. If our toolchain supports
|
||||
# build-id, install .build-id links as well.
|
||||
#
|
||||
quiet_cmd_vdso_install = INSTALL $(@:install_%=%)
|
||||
define cmd_vdso_install
|
||||
cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \
|
||||
if readelf -n $< |grep -q 'Build ID'; then \
|
||||
buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \
|
||||
first=`echo $$buildid | cut -b-2`; \
|
||||
last=`echo $$buildid | cut -b3-`; \
|
||||
mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \
|
||||
ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \
|
||||
fi
|
||||
endef
|
||||
|
||||
vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%)
|
||||
|
||||
$(MODLIB)/vdso: FORCE
|
||||
@mkdir -p $(MODLIB)/vdso
|
||||
|
||||
$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE
|
||||
$(call cmd,vdso_install)
|
||||
|
||||
PHONY += vdso_install $(vdso_img_insttargets)
|
||||
vdso_install: $(vdso_img_insttargets) FORCE
|
||||
|
||||
clean-files := vdso32-syscall* vdso32-sysenter* vdso32-int80* vdso64* vdso-image-*.c vdsox32.so*
|
10
arch/x86/entry/vdso/checkundef.sh
可执行文件
10
arch/x86/entry/vdso/checkundef.sh
可执行文件
@@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
nm="$1"
|
||||
file="$2"
|
||||
$nm "$file" | grep '^ *U' > /dev/null 2>&1
|
||||
if [ $? -eq 1 ]; then
|
||||
exit 0
|
||||
else
|
||||
echo "$file: undefined symbols found" >&2
|
||||
exit 1
|
||||
fi
|
351
arch/x86/entry/vdso/vclock_gettime.c
普通文件
351
arch/x86/entry/vdso/vclock_gettime.c
普通文件
@@ -0,0 +1,351 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of clock_gettime, gettimeofday, and time.
|
||||
*
|
||||
* 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
|
||||
* sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
|
||||
*
|
||||
* The code should have no internal unresolved relocations.
|
||||
* Check with readelf after changing.
|
||||
*/
|
||||
|
||||
#include <uapi/linux/time.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/msr.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/time.h>
|
||||
|
||||
#define gtod (&VVAR(vsyscall_gtod_data))
|
||||
|
||||
extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
|
||||
extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
|
||||
extern time_t __vdso_time(time_t *t);
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
extern u8 hpet_page
|
||||
__attribute__((visibility("hidden")));
|
||||
|
||||
static notrace cycle_t vread_hpet(void)
|
||||
{
|
||||
return *(const volatile u32 *)(&hpet_page + HPET_COUNTER);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef BUILD_VDSO32
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/pvclock.h>
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm("syscall" : "=a" (ret) :
|
||||
"0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti_base;
|
||||
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
|
||||
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
|
||||
|
||||
BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
|
||||
|
||||
pvti_base = (struct pvclock_vsyscall_time_info *)
|
||||
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
|
||||
|
||||
return &pvti_base[offset];
|
||||
}
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
const struct pvclock_vsyscall_time_info *pvti;
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
u32 version;
|
||||
u8 flags;
|
||||
unsigned cpu, cpu1;
|
||||
|
||||
|
||||
/*
|
||||
* Note: hypervisor must guarantee that:
|
||||
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
|
||||
* 2. that per-CPU pvclock time info is updated if the
|
||||
* underlying CPU changes.
|
||||
* 3. that version is increased whenever underlying CPU
|
||||
* changes.
|
||||
*
|
||||
*/
|
||||
do {
|
||||
cpu = __getcpu() & VGETCPU_CPU_MASK;
|
||||
/* TODO: We can put vcpu id into higher bits of pvti.version.
|
||||
* This will save a couple of cycles by getting rid of
|
||||
* __getcpu() calls (Gleb).
|
||||
*/
|
||||
|
||||
pvti = get_pvti(cpu);
|
||||
|
||||
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
|
||||
|
||||
/*
|
||||
* Test we're still on the cpu as well as the version.
|
||||
* We could have been migrated just after the first
|
||||
* vgetcpu but before fetching the version, so we
|
||||
* wouldn't notice a version change.
|
||||
*/
|
||||
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
|
||||
} while (unlikely(cpu != cpu1 ||
|
||||
(pvti->pvti.version & 1) ||
|
||||
pvti->pvti.version != version));
|
||||
|
||||
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
|
||||
*mode = VCLOCK_NONE;
|
||||
|
||||
/* refer to tsc.c read_tsc() comment for rationale */
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
return last;
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
|
||||
notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
long ret;
|
||||
|
||||
asm(
|
||||
"mov %%ebx, %%edx \n"
|
||||
"mov %2, %%ebx \n"
|
||||
"call __kernel_vsyscall \n"
|
||||
"mov %%edx, %%ebx \n"
|
||||
: "=a" (ret)
|
||||
: "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
|
||||
: "memory", "edx");
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
|
||||
static notrace cycle_t vread_pvclock(int *mode)
|
||||
{
|
||||
*mode = VCLOCK_NONE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
notrace static cycle_t vread_tsc(void)
|
||||
{
|
||||
cycle_t ret;
|
||||
u64 last;
|
||||
|
||||
/*
|
||||
* Empirically, a fence (of type that depends on the CPU)
|
||||
* before rdtsc is enough to ensure that rdtsc is ordered
|
||||
* with respect to loads. The various CPU manuals are unclear
|
||||
* as to whether rdtsc can be reordered with later loads,
|
||||
* but no one has ever seen it happen.
|
||||
*/
|
||||
rdtsc_barrier();
|
||||
ret = (cycle_t)__native_read_tsc();
|
||||
|
||||
last = gtod->cycle_last;
|
||||
|
||||
if (likely(ret >= last))
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* GCC likes to generate cmov here, but this branch is extremely
|
||||
* predictable (it's just a funciton of time and the likely is
|
||||
* very likely) and there's a data dependence, so force GCC
|
||||
* to generate a branch instead. I don't barrier() because
|
||||
* we don't actually need a barrier, and if this function
|
||||
* ever gets inlined it will generate worse code.
|
||||
*/
|
||||
asm volatile ("");
|
||||
return last;
|
||||
}
|
||||
|
||||
notrace static inline u64 vgetsns(int *mode)
|
||||
{
|
||||
u64 v;
|
||||
cycles_t cycles;
|
||||
|
||||
if (gtod->vclock_mode == VCLOCK_TSC)
|
||||
cycles = vread_tsc();
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
else if (gtod->vclock_mode == VCLOCK_HPET)
|
||||
cycles = vread_hpet();
|
||||
#endif
|
||||
#ifdef CONFIG_PARAVIRT_CLOCK
|
||||
else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
|
||||
cycles = vread_pvclock(mode);
|
||||
#endif
|
||||
else
|
||||
return 0;
|
||||
v = (cycles - gtod->cycle_last) & gtod->mask;
|
||||
return v * gtod->mult;
|
||||
}
|
||||
|
||||
/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
|
||||
notrace static int __always_inline do_realtime(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
mode = gtod->vclock_mode;
|
||||
ts->tv_sec = gtod->wall_time_sec;
|
||||
ns = gtod->wall_time_snsec;
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->shift;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
notrace static int __always_inline do_monotonic(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
u64 ns;
|
||||
int mode;
|
||||
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
mode = gtod->vclock_mode;
|
||||
ts->tv_sec = gtod->monotonic_time_sec;
|
||||
ns = gtod->monotonic_time_snsec;
|
||||
ns += vgetsns(&mode);
|
||||
ns >>= gtod->shift;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
|
||||
ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
|
||||
ts->tv_nsec = ns;
|
||||
|
||||
return mode;
|
||||
}
|
||||
|
||||
notrace static void do_realtime_coarse(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
ts->tv_sec = gtod->wall_time_coarse_sec;
|
||||
ts->tv_nsec = gtod->wall_time_coarse_nsec;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
}
|
||||
|
||||
notrace static void do_monotonic_coarse(struct timespec *ts)
|
||||
{
|
||||
unsigned long seq;
|
||||
do {
|
||||
seq = gtod_read_begin(gtod);
|
||||
ts->tv_sec = gtod->monotonic_time_coarse_sec;
|
||||
ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
|
||||
} while (unlikely(gtod_read_retry(gtod, seq)));
|
||||
}
|
||||
|
||||
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
|
||||
{
|
||||
switch (clock) {
|
||||
case CLOCK_REALTIME:
|
||||
if (do_realtime(ts) == VCLOCK_NONE)
|
||||
goto fallback;
|
||||
break;
|
||||
case CLOCK_MONOTONIC:
|
||||
if (do_monotonic(ts) == VCLOCK_NONE)
|
||||
goto fallback;
|
||||
break;
|
||||
case CLOCK_REALTIME_COARSE:
|
||||
do_realtime_coarse(ts);
|
||||
break;
|
||||
case CLOCK_MONOTONIC_COARSE:
|
||||
do_monotonic_coarse(ts);
|
||||
break;
|
||||
default:
|
||||
goto fallback;
|
||||
}
|
||||
|
||||
return 0;
|
||||
fallback:
|
||||
return vdso_fallback_gettime(clock, ts);
|
||||
}
|
||||
int clock_gettime(clockid_t, struct timespec *)
|
||||
__attribute__((weak, alias("__vdso_clock_gettime")));
|
||||
|
||||
notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
|
||||
{
|
||||
if (likely(tv != NULL)) {
|
||||
if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
|
||||
return vdso_fallback_gtod(tv, tz);
|
||||
tv->tv_usec /= 1000;
|
||||
}
|
||||
if (unlikely(tz != NULL)) {
|
||||
tz->tz_minuteswest = gtod->tz_minuteswest;
|
||||
tz->tz_dsttime = gtod->tz_dsttime;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
int gettimeofday(struct timeval *, struct timezone *)
|
||||
__attribute__((weak, alias("__vdso_gettimeofday")));
|
||||
|
||||
/*
|
||||
* This will break when the xtime seconds get inaccurate, but that is
|
||||
* unlikely
|
||||
*/
|
||||
notrace time_t __vdso_time(time_t *t)
|
||||
{
|
||||
/* This is atomic on x86 so we don't need any locks. */
|
||||
time_t result = ACCESS_ONCE(gtod->wall_time_sec);
|
||||
|
||||
if (t)
|
||||
*t = result;
|
||||
return result;
|
||||
}
|
||||
int time(time_t *t)
|
||||
__attribute__((weak, alias("__vdso_time")));
|
@@ -0,0 +1,118 @@
|
||||
#include <asm/vdso.h>
|
||||
|
||||
/*
|
||||
* Linker script for vDSO. This is an ELF shared object prelinked to
|
||||
* its virtual address, and with only one read-only segment.
|
||||
* This script controls its layout.
|
||||
*/
|
||||
|
||||
#if defined(BUILD_VDSO64)
|
||||
# define SHDR_SIZE 64
|
||||
#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32)
|
||||
# define SHDR_SIZE 40
|
||||
#else
|
||||
# error unknown VDSO target
|
||||
#endif
|
||||
|
||||
#define NUM_FAKE_SHDRS 13
|
||||
|
||||
SECTIONS
|
||||
{
|
||||
/*
|
||||
* User/kernel shared data is before the vDSO. This may be a little
|
||||
* uglier than putting it after the vDSO, but it avoids issues with
|
||||
* non-allocatable things that dangle past the end of the PT_LOAD
|
||||
* segment.
|
||||
*/
|
||||
|
||||
vvar_start = . - 2 * PAGE_SIZE;
|
||||
vvar_page = vvar_start;
|
||||
|
||||
/* Place all vvars at the offsets in asm/vvar.h. */
|
||||
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar_page + offset;
|
||||
#define __VVAR_KERNEL_LDS
|
||||
#include <asm/vvar.h>
|
||||
#undef __VVAR_KERNEL_LDS
|
||||
#undef EMIT_VVAR
|
||||
|
||||
hpet_page = vvar_start + PAGE_SIZE;
|
||||
|
||||
. = SIZEOF_HEADERS;
|
||||
|
||||
.hash : { *(.hash) } :text
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
.dynsym : { *(.dynsym) }
|
||||
.dynstr : { *(.dynstr) }
|
||||
.gnu.version : { *(.gnu.version) }
|
||||
.gnu.version_d : { *(.gnu.version_d) }
|
||||
.gnu.version_r : { *(.gnu.version_r) }
|
||||
|
||||
.dynamic : { *(.dynamic) } :text :dynamic
|
||||
|
||||
.rodata : {
|
||||
*(.rodata*)
|
||||
*(.data*)
|
||||
*(.sdata*)
|
||||
*(.got.plt) *(.got)
|
||||
*(.gnu.linkonce.d.*)
|
||||
*(.bss*)
|
||||
*(.dynbss*)
|
||||
*(.gnu.linkonce.b.*)
|
||||
|
||||
/*
|
||||
* Ideally this would live in a C file, but that won't
|
||||
* work cleanly for x32 until we start building the x32
|
||||
* C code using an x32 toolchain.
|
||||
*/
|
||||
VDSO_FAKE_SECTION_TABLE_START = .;
|
||||
. = . + NUM_FAKE_SHDRS * SHDR_SIZE;
|
||||
VDSO_FAKE_SECTION_TABLE_END = .;
|
||||
} :text
|
||||
|
||||
.fake_shstrtab : { *(.fake_shstrtab) } :text
|
||||
|
||||
|
||||
.note : { *(.note.*) } :text :note
|
||||
|
||||
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
|
||||
.eh_frame : { KEEP (*(.eh_frame)) } :text
|
||||
|
||||
|
||||
/*
|
||||
* Text is well-separated from actual data: there's plenty of
|
||||
* stuff that isn't used at runtime in between.
|
||||
*/
|
||||
|
||||
.text : { *(.text*) } :text =0x90909090,
|
||||
|
||||
/*
|
||||
* At the end so that eu-elflint stays happy when vdso2c strips
|
||||
* these. A better implementation would avoid allocating space
|
||||
* for these.
|
||||
*/
|
||||
.altinstructions : { *(.altinstructions) } :text
|
||||
.altinstr_replacement : { *(.altinstr_replacement) } :text
|
||||
|
||||
/DISCARD/ : {
|
||||
*(.discard)
|
||||
*(.discard.*)
|
||||
*(__bug_table)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Very old versions of ld do not recognize this name token; use the constant.
|
||||
*/
|
||||
#define PT_GNU_EH_FRAME 0x6474e550
|
||||
|
||||
/*
|
||||
* We must supply the ELF program headers explicitly to get just one
|
||||
* PT_LOAD segment, and set the flags explicitly to make segments read-only.
|
||||
*/
|
||||
PHDRS
|
||||
{
|
||||
text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
|
||||
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
|
||||
note PT_NOTE FLAGS(4); /* PF_R */
|
||||
eh_frame_hdr PT_GNU_EH_FRAME;
|
||||
}
|
12
arch/x86/entry/vdso/vdso-note.S
普通文件
12
arch/x86/entry/vdso/vdso-note.S
普通文件
@@ -0,0 +1,12 @@
|
||||
/*
|
||||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
|
||||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/uts.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ELFNOTE_END
|
29
arch/x86/entry/vdso/vdso.lds.S
普通文件
29
arch/x86/entry/vdso/vdso.lds.S
普通文件
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Linker script for 64-bit vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#define BUILD_VDSO64
|
||||
|
||||
#include "vdso-layout.lds.S"
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION {
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
clock_gettime;
|
||||
__vdso_clock_gettime;
|
||||
gettimeofday;
|
||||
__vdso_gettimeofday;
|
||||
getcpu;
|
||||
__vdso_getcpu;
|
||||
time;
|
||||
__vdso_time;
|
||||
local: *;
|
||||
};
|
||||
}
|
253
arch/x86/entry/vdso/vdso2c.c
普通文件
253
arch/x86/entry/vdso/vdso2c.c
普通文件
@@ -0,0 +1,253 @@
|
||||
/*
|
||||
* vdso2c - A vdso image preparation tool
|
||||
* Copyright (c) 2014 Andy Lutomirski and others
|
||||
* Licensed under the GPL v2
|
||||
*
|
||||
* vdso2c requires stripped and unstripped input. It would be trivial
|
||||
* to fully strip the input in here, but, for reasons described below,
|
||||
* we need to write a section table. Doing this is more or less
|
||||
* equivalent to dropping all non-allocatable sections, but it's
|
||||
* easier to let objcopy handle that instead of doing it ourselves.
|
||||
* If we ever need to do something fancier than what objcopy provides,
|
||||
* it would be straightforward to add here.
|
||||
*
|
||||
* We're keep a section table for a few reasons:
|
||||
*
|
||||
* The Go runtime had a couple of bugs: it would read the section
|
||||
* table to try to figure out how many dynamic symbols there were (it
|
||||
* shouldn't have looked at the section table at all) and, if there
|
||||
* were no SHT_SYNDYM section table entry, it would use an
|
||||
* uninitialized value for the number of symbols. An empty DYNSYM
|
||||
* table would work, but I see no reason not to write a valid one (and
|
||||
* keep full performance for old Go programs). This hack is only
|
||||
* needed on x86_64.
|
||||
*
|
||||
* The bug was introduced on 2012-08-31 by:
|
||||
* https://code.google.com/p/go/source/detail?r=56ea40aac72b
|
||||
* and was fixed on 2014-06-13 by:
|
||||
* https://code.google.com/p/go/source/detail?r=fc1cd5e12595
|
||||
*
|
||||
* Binutils has issues debugging the vDSO: it reads the section table to
|
||||
* find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which
|
||||
* would break build-id if we removed the section table. Binutils
|
||||
* also requires that shstrndx != 0. See:
|
||||
* https://sourceware.org/bugzilla/show_bug.cgi?id=17064
|
||||
*
|
||||
* elfutils might not look for PT_NOTE if there is a section table at
|
||||
* all. I don't know whether this matters for any practical purpose.
|
||||
*
|
||||
* For simplicity, rather than hacking up a partial section table, we
|
||||
* just write a mostly complete one. We omit non-dynamic symbols,
|
||||
* though, since they're rather large.
|
||||
*
|
||||
* Once binutils gets fixed, we might be able to drop this for all but
|
||||
* the 64-bit vdso, since build-id only works in kernel RPMs, and
|
||||
* systems that update to new enough kernel RPMs will likely update
|
||||
* binutils in sync. build-id has never worked for home-built kernel
|
||||
* RPMs without manual symlinking, and I suspect that no one ever does
|
||||
* that.
|
||||
*/
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <err.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <tools/le_byteshift.h>
|
||||
|
||||
#include <linux/elf.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
const char *outfilename;
|
||||
|
||||
/* Symbols that we need in vdso2c. */
|
||||
enum {
|
||||
sym_vvar_start,
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_START,
|
||||
sym_VDSO_FAKE_SECTION_TABLE_END,
|
||||
};
|
||||
|
||||
const int special_pages[] = {
|
||||
sym_vvar_page,
|
||||
sym_hpet_page,
|
||||
};
|
||||
|
||||
struct vdso_sym {
|
||||
const char *name;
|
||||
bool export;
|
||||
};
|
||||
|
||||
struct vdso_sym required_syms[] = {
|
||||
[sym_vvar_start] = {"vvar_start", true},
|
||||
[sym_vvar_page] = {"vvar_page", true},
|
||||
[sym_hpet_page] = {"hpet_page", true},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_START", false
|
||||
},
|
||||
[sym_VDSO_FAKE_SECTION_TABLE_END] = {
|
||||
"VDSO_FAKE_SECTION_TABLE_END", false
|
||||
},
|
||||
{"VDSO32_NOTE_MASK", true},
|
||||
{"VDSO32_SYSENTER_RETURN", true},
|
||||
{"__kernel_vsyscall", true},
|
||||
{"__kernel_sigreturn", true},
|
||||
{"__kernel_rt_sigreturn", true},
|
||||
};
|
||||
|
||||
__attribute__((format(printf, 1, 2))) __attribute__((noreturn))
|
||||
static void fail(const char *format, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, format);
|
||||
fprintf(stderr, "Error: ");
|
||||
vfprintf(stderr, format, ap);
|
||||
if (outfilename)
|
||||
unlink(outfilename);
|
||||
exit(1);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evil macros for little-endian reads and writes
|
||||
*/
|
||||
#define GLE(x, bits, ifnot) \
|
||||
__builtin_choose_expr( \
|
||||
(sizeof(*(x)) == bits/8), \
|
||||
(__typeof__(*(x)))get_unaligned_le##bits(x), ifnot)
|
||||
|
||||
extern void bad_get_le(void);
|
||||
#define LAST_GLE(x) \
|
||||
__builtin_choose_expr(sizeof(*(x)) == 1, *(x), bad_get_le())
|
||||
|
||||
#define GET_LE(x) \
|
||||
GLE(x, 64, GLE(x, 32, GLE(x, 16, LAST_GLE(x))))
|
||||
|
||||
#define PLE(x, val, bits, ifnot) \
|
||||
__builtin_choose_expr( \
|
||||
(sizeof(*(x)) == bits/8), \
|
||||
put_unaligned_le##bits((val), (x)), ifnot)
|
||||
|
||||
extern void bad_put_le(void);
|
||||
#define LAST_PLE(x, val) \
|
||||
__builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), bad_put_le())
|
||||
|
||||
#define PUT_LE(x, val) \
|
||||
PLE(x, val, 64, PLE(x, val, 32, PLE(x, val, 16, LAST_PLE(x, val))))
|
||||
|
||||
|
||||
#define NSYMS (sizeof(required_syms) / sizeof(required_syms[0]))
|
||||
|
||||
#define BITSFUNC3(name, bits, suffix) name##bits##suffix
|
||||
#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix)
|
||||
#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, )
|
||||
|
||||
#define INT_BITS BITSFUNC2(int, ELF_BITS, _t)
|
||||
|
||||
#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
|
||||
#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
|
||||
#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
|
||||
|
||||
#define ELF_BITS 64
|
||||
#include "vdso2c.h"
|
||||
#undef ELF_BITS
|
||||
|
||||
#define ELF_BITS 32
|
||||
#include "vdso2c.h"
|
||||
#undef ELF_BITS
|
||||
|
||||
static void go(void *raw_addr, size_t raw_len,
|
||||
void *stripped_addr, size_t stripped_len,
|
||||
FILE *outfile, const char *name)
|
||||
{
|
||||
Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr;
|
||||
|
||||
if (hdr->e_ident[EI_CLASS] == ELFCLASS64) {
|
||||
go64(raw_addr, raw_len, stripped_addr, stripped_len,
|
||||
outfile, name);
|
||||
} else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) {
|
||||
go32(raw_addr, raw_len, stripped_addr, stripped_len,
|
||||
outfile, name);
|
||||
} else {
|
||||
fail("unknown ELF class\n");
|
||||
}
|
||||
}
|
||||
|
||||
static void map_input(const char *name, void **addr, size_t *len, int prot)
|
||||
{
|
||||
off_t tmp_len;
|
||||
|
||||
int fd = open(name, O_RDONLY);
|
||||
if (fd == -1)
|
||||
err(1, "%s", name);
|
||||
|
||||
tmp_len = lseek(fd, 0, SEEK_END);
|
||||
if (tmp_len == (off_t)-1)
|
||||
err(1, "lseek");
|
||||
*len = (size_t)tmp_len;
|
||||
|
||||
*addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0);
|
||||
if (*addr == MAP_FAILED)
|
||||
err(1, "mmap");
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
size_t raw_len, stripped_len;
|
||||
void *raw_addr, *stripped_addr;
|
||||
FILE *outfile;
|
||||
char *name, *tmp;
|
||||
int namelen;
|
||||
|
||||
if (argc != 4) {
|
||||
printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Figure out the struct name. If we're writing to a .so file,
|
||||
* generate raw output insted.
|
||||
*/
|
||||
name = strdup(argv[3]);
|
||||
namelen = strlen(name);
|
||||
if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) {
|
||||
name = NULL;
|
||||
} else {
|
||||
tmp = strrchr(name, '/');
|
||||
if (tmp)
|
||||
name = tmp + 1;
|
||||
tmp = strchr(name, '.');
|
||||
if (tmp)
|
||||
*tmp = '\0';
|
||||
for (tmp = name; *tmp; tmp++)
|
||||
if (*tmp == '-')
|
||||
*tmp = '_';
|
||||
}
|
||||
|
||||
map_input(argv[1], &raw_addr, &raw_len, PROT_READ);
|
||||
map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ);
|
||||
|
||||
outfilename = argv[3];
|
||||
outfile = fopen(outfilename, "w");
|
||||
if (!outfile)
|
||||
err(1, "%s", argv[2]);
|
||||
|
||||
go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name);
|
||||
|
||||
munmap(raw_addr, raw_len);
|
||||
munmap(stripped_addr, stripped_len);
|
||||
fclose(outfile);
|
||||
|
||||
return 0;
|
||||
}
|
175
arch/x86/entry/vdso/vdso2c.h
普通文件
175
arch/x86/entry/vdso/vdso2c.h
普通文件
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
* This file is included twice from vdso2c.c. It generates code for 32-bit
|
||||
* and 64-bit vDSOs. We need both for 64-bit builds, since 32-bit vDSOs
|
||||
* are built for 32-bit userspace.
|
||||
*/
|
||||
|
||||
static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
|
||||
void *stripped_addr, size_t stripped_len,
|
||||
FILE *outfile, const char *name)
|
||||
{
|
||||
int found_load = 0;
|
||||
unsigned long load_size = -1; /* Work around bogus warning */
|
||||
unsigned long mapping_size;
|
||||
ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr;
|
||||
int i;
|
||||
unsigned long j;
|
||||
ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr,
|
||||
*alt_sec = NULL;
|
||||
ELF(Dyn) *dyn = 0, *dyn_end = 0;
|
||||
const char *secstrings;
|
||||
INT_BITS syms[NSYMS] = {};
|
||||
|
||||
ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_LE(&hdr->e_phoff));
|
||||
|
||||
/* Walk the segment table. */
|
||||
for (i = 0; i < GET_LE(&hdr->e_phnum); i++) {
|
||||
if (GET_LE(&pt[i].p_type) == PT_LOAD) {
|
||||
if (found_load)
|
||||
fail("multiple PT_LOAD segs\n");
|
||||
|
||||
if (GET_LE(&pt[i].p_offset) != 0 ||
|
||||
GET_LE(&pt[i].p_vaddr) != 0)
|
||||
fail("PT_LOAD in wrong place\n");
|
||||
|
||||
if (GET_LE(&pt[i].p_memsz) != GET_LE(&pt[i].p_filesz))
|
||||
fail("cannot handle memsz != filesz\n");
|
||||
|
||||
load_size = GET_LE(&pt[i].p_memsz);
|
||||
found_load = 1;
|
||||
} else if (GET_LE(&pt[i].p_type) == PT_DYNAMIC) {
|
||||
dyn = raw_addr + GET_LE(&pt[i].p_offset);
|
||||
dyn_end = raw_addr + GET_LE(&pt[i].p_offset) +
|
||||
GET_LE(&pt[i].p_memsz);
|
||||
}
|
||||
}
|
||||
if (!found_load)
|
||||
fail("no PT_LOAD seg\n");
|
||||
|
||||
if (stripped_len < load_size)
|
||||
fail("stripped input is too short\n");
|
||||
|
||||
/* Walk the dynamic table */
|
||||
for (i = 0; dyn + i < dyn_end &&
|
||||
GET_LE(&dyn[i].d_tag) != DT_NULL; i++) {
|
||||
typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag);
|
||||
if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA ||
|
||||
tag == DT_RELENT || tag == DT_TEXTREL)
|
||||
fail("vdso image contains dynamic relocations\n");
|
||||
}
|
||||
|
||||
/* Walk the section table */
|
||||
secstrings_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx);
|
||||
secstrings = raw_addr + GET_LE(&secstrings_hdr->sh_offset);
|
||||
for (i = 0; i < GET_LE(&hdr->e_shnum); i++) {
|
||||
ELF(Shdr) *sh = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize) * i;
|
||||
if (GET_LE(&sh->sh_type) == SHT_SYMTAB)
|
||||
symtab_hdr = sh;
|
||||
|
||||
if (!strcmp(secstrings + GET_LE(&sh->sh_name),
|
||||
".altinstructions"))
|
||||
alt_sec = sh;
|
||||
}
|
||||
|
||||
if (!symtab_hdr)
|
||||
fail("no symbol table\n");
|
||||
|
||||
strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) +
|
||||
GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link);
|
||||
|
||||
/* Walk the symbol table */
|
||||
for (i = 0;
|
||||
i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize);
|
||||
i++) {
|
||||
int k;
|
||||
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
|
||||
GET_LE(&symtab_hdr->sh_entsize) * i;
|
||||
const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
|
||||
GET_LE(&sym->st_name);
|
||||
|
||||
for (k = 0; k < NSYMS; k++) {
|
||||
if (!strcmp(name, required_syms[k].name)) {
|
||||
if (syms[k]) {
|
||||
fail("duplicate symbol %s\n",
|
||||
required_syms[k].name);
|
||||
}
|
||||
|
||||
/*
|
||||
* Careful: we use negative addresses, but
|
||||
* st_value is unsigned, so we rely
|
||||
* on syms[k] being a signed type of the
|
||||
* correct width.
|
||||
*/
|
||||
syms[k] = GET_LE(&sym->st_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Validate mapping addresses. */
|
||||
for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
|
||||
INT_BITS symval = syms[special_pages[i]];
|
||||
|
||||
if (!symval)
|
||||
continue; /* The mapping isn't used; ignore it. */
|
||||
|
||||
if (symval % 4096)
|
||||
fail("%s must be a multiple of 4096\n",
|
||||
required_syms[i].name);
|
||||
if (symval + 4096 < syms[sym_vvar_start])
|
||||
fail("%s underruns vvar_start\n",
|
||||
required_syms[i].name);
|
||||
if (symval + 4096 > 0)
|
||||
fail("%s is on the wrong side of the vdso text\n",
|
||||
required_syms[i].name);
|
||||
}
|
||||
if (syms[sym_vvar_start] % 4096)
|
||||
fail("vvar_begin must be a multiple of 4096\n");
|
||||
|
||||
if (!name) {
|
||||
fwrite(stripped_addr, stripped_len, 1, outfile);
|
||||
return;
|
||||
}
|
||||
|
||||
mapping_size = (stripped_len + 4095) / 4096 * 4096;
|
||||
|
||||
fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n");
|
||||
fprintf(outfile, "#include <linux/linkage.h>\n");
|
||||
fprintf(outfile, "#include <asm/page_types.h>\n");
|
||||
fprintf(outfile, "#include <asm/vdso.h>\n");
|
||||
fprintf(outfile, "\n");
|
||||
fprintf(outfile,
|
||||
"static unsigned char raw_data[%lu] __page_aligned_data = {",
|
||||
mapping_size);
|
||||
for (j = 0; j < stripped_len; j++) {
|
||||
if (j % 10 == 0)
|
||||
fprintf(outfile, "\n\t");
|
||||
fprintf(outfile, "0x%02X, ",
|
||||
(int)((unsigned char *)stripped_addr)[j]);
|
||||
}
|
||||
fprintf(outfile, "\n};\n\n");
|
||||
|
||||
fprintf(outfile, "static struct page *pages[%lu];\n\n",
|
||||
mapping_size / 4096);
|
||||
|
||||
fprintf(outfile, "const struct vdso_image %s = {\n", name);
|
||||
fprintf(outfile, "\t.data = raw_data,\n");
|
||||
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
|
||||
fprintf(outfile, "\t.text_mapping = {\n");
|
||||
fprintf(outfile, "\t\t.name = \"[vdso]\",\n");
|
||||
fprintf(outfile, "\t\t.pages = pages,\n");
|
||||
fprintf(outfile, "\t},\n");
|
||||
if (alt_sec) {
|
||||
fprintf(outfile, "\t.alt = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_offset));
|
||||
fprintf(outfile, "\t.alt_len = %lu,\n",
|
||||
(unsigned long)GET_LE(&alt_sec->sh_size));
|
||||
}
|
||||
for (i = 0; i < NSYMS; i++) {
|
||||
if (required_syms[i].export && syms[i])
|
||||
fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n",
|
||||
required_syms[i].name, (int64_t)syms[i]);
|
||||
}
|
||||
fprintf(outfile, "};\n");
|
||||
}
|
120
arch/x86/entry/vdso/vdso32-setup.c
普通文件
120
arch/x86/entry/vdso/vdso32-setup.c
普通文件
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
* (C) Copyright 2002 Linus Torvalds
|
||||
* Portions based on the vdso-randomization code from exec-shield:
|
||||
* Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar
|
||||
*
|
||||
* This file contains the needed initializations to support sysenter.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm_types.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/vdso.h>
|
||||
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
#define VDSO_DEFAULT 0
|
||||
#else
|
||||
#define VDSO_DEFAULT 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Should the kernel map a VDSO page into processes and pass its
|
||||
* address down to glibc upon exec()?
|
||||
*/
|
||||
unsigned int __read_mostly vdso32_enabled = VDSO_DEFAULT;
|
||||
|
||||
static int __init vdso32_setup(char *s)
|
||||
{
|
||||
vdso32_enabled = simple_strtoul(s, NULL, 0);
|
||||
|
||||
if (vdso32_enabled > 1)
|
||||
pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For consistency, the argument vdso32=[012] affects the 32-bit vDSO
|
||||
* behavior on both 64-bit and 32-bit kernels.
|
||||
* On 32-bit kernels, vdso=[012] means the same thing.
|
||||
*/
|
||||
__setup("vdso32=", vdso32_setup);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
__setup_param("vdso=", vdso_setup, vdso32_setup, 0);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
|
||||
#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
|
||||
#define vdso32_syscall() (0)
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
const struct vdso_image *selected_vdso32;
|
||||
#endif
|
||||
|
||||
int __init sysenter_setup(void)
|
||||
{
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (vdso32_syscall())
|
||||
selected_vdso32 = &vdso_image_32_syscall;
|
||||
else
|
||||
#endif
|
||||
if (vdso32_sysenter())
|
||||
selected_vdso32 = &vdso_image_32_sysenter;
|
||||
else
|
||||
selected_vdso32 = &vdso_image_32_int80;
|
||||
|
||||
init_vdso_image(selected_vdso32);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
subsys_initcall(sysenter_setup);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
/* Register vsyscall32 into the ABI table */
|
||||
#include <linux/sysctl.h>
|
||||
|
||||
static struct ctl_table abi_table2[] = {
|
||||
{
|
||||
.procname = "vsyscall32",
|
||||
.data = &vdso32_enabled,
|
||||
.maxlen = sizeof(int),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct ctl_table abi_root_table2[] = {
|
||||
{
|
||||
.procname = "abi",
|
||||
.mode = 0555,
|
||||
.child = abi_table2
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static __init int ia32_binfmt_init(void)
|
||||
{
|
||||
register_sysctl_table(abi_root_table2);
|
||||
return 0;
|
||||
}
|
||||
__initcall(ia32_binfmt_init);
|
||||
#endif /* CONFIG_SYSCTL */
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
1
arch/x86/entry/vdso/vdso32/.gitignore
vendored
普通文件
1
arch/x86/entry/vdso/vdso32/.gitignore
vendored
普通文件
@@ -0,0 +1 @@
|
||||
vdso32.lds
|
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the old int $0x80 method.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#include "sigreturn.S"
|
||||
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
int $0x80
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI:
|
||||
.long .LENDCIEDLSI-.LSTARTCIEDLSI
|
||||
.LSTARTCIEDLSI:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIEDLSI:
|
||||
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
|
||||
.LSTARTFDEDLSI:
|
||||
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0
|
||||
.align 4
|
||||
.LENDFDEDLSI:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Pad out the segment to match the size of the sysenter.S version.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = 0x40
|
||||
.section .data,"aw",@progbits
|
||||
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDEDLSI-.LSTARTFRAMEDLSI), 0
|
||||
.previous
|
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
|
||||
* Here we can supply some information useful to userland.
|
||||
*/
|
||||
|
||||
#include <linux/version.h>
|
||||
#include <linux/elfnote.h>
|
||||
|
||||
/* Ideally this would use UTS_NAME, but using a quoted string here
|
||||
doesn't work. Remember to change this when changing the
|
||||
kernel's name. */
|
||||
ELFNOTE_START(Linux, 0, "a")
|
||||
.long LINUX_VERSION_CODE
|
||||
ELFNOTE_END
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
/*
|
||||
* Add a special note telling glibc's dynamic linker a fake hardware
|
||||
* flavor that it will use to choose the search path for libraries in the
|
||||
* same way it uses real hardware capabilities like "mmx".
|
||||
* We supply "nosegneg" as the fake capability, to indicate that we
|
||||
* do not like negative offsets in instructions using segment overrides,
|
||||
* since we implement those inefficiently. This makes it possible to
|
||||
* install libraries optimized to avoid those access patterns in someplace
|
||||
* like /lib/i686/tls/nosegneg. Note that an /etc/ld.so.conf.d/file
|
||||
* corresponding to the bits here is needed to make ldconfig work right.
|
||||
* It should contain:
|
||||
* hwcap 1 nosegneg
|
||||
* to match the mapping of bit to name that we give here.
|
||||
*
|
||||
* At runtime, the fake hardware feature will be considered to be present
|
||||
* if its bit is set in the mask word. So, we start with the mask 0, and
|
||||
* at boot time we set VDSO_NOTE_NONEGSEG_BIT if running under Xen.
|
||||
*/
|
||||
|
||||
#include "../../xen/vdso.h" /* Defines VDSO_NOTE_NONEGSEG_BIT. */
|
||||
|
||||
ELFNOTE_START(GNU, 2, "a")
|
||||
.long 1 /* ncaps */
|
||||
VDSO32_NOTE_MASK: /* Symbol used by arch/x86/xen/setup.c */
|
||||
.long 0 /* mask */
|
||||
.byte VDSO_NOTE_NONEGSEG_BIT; .asciz "nosegneg" /* bit, name */
|
||||
ELFNOTE_END
|
||||
#endif
|
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Common code for the sigreturn entry points in vDSO images.
|
||||
* So far this code is the same for both int80 and sysenter versions.
|
||||
* This file is #include'd by int80.S et al to define them first thing.
|
||||
* The kernel assumes that the addresses of these routines are constant
|
||||
* for all vDSO implementations.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/unistd_32.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
|
||||
#ifndef SYSCALL_ENTER_KERNEL
|
||||
#define SYSCALL_ENTER_KERNEL int $0x80
|
||||
#endif
|
||||
|
||||
.text
|
||||
.globl __kernel_sigreturn
|
||||
.type __kernel_sigreturn,@function
|
||||
nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
|
||||
ALIGN
|
||||
__kernel_sigreturn:
|
||||
.LSTART_sigreturn:
|
||||
popl %eax /* XXX does this mean it needs unwind info? */
|
||||
movl $__NR_sigreturn, %eax
|
||||
SYSCALL_ENTER_KERNEL
|
||||
.LEND_sigreturn:
|
||||
nop
|
||||
.size __kernel_sigreturn,.-.LSTART_sigreturn
|
||||
|
||||
.globl __kernel_rt_sigreturn
|
||||
.type __kernel_rt_sigreturn,@function
|
||||
ALIGN
|
||||
__kernel_rt_sigreturn:
|
||||
.LSTART_rt_sigreturn:
|
||||
movl $__NR_rt_sigreturn, %eax
|
||||
SYSCALL_ENTER_KERNEL
|
||||
.LEND_rt_sigreturn:
|
||||
nop
|
||||
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI1:
|
||||
.long .LENDCIEDLSI1-.LSTARTCIEDLSI1
|
||||
.LSTARTCIEDLSI1:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zRS" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0 /* DW_CFA_nop */
|
||||
.align 4
|
||||
.LENDCIEDLSI1:
|
||||
.long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
|
||||
.LSTARTFDEDLSI1:
|
||||
.long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
|
||||
/* HACK: The dwarf2 unwind routines will subtract 1 from the
|
||||
return address to get an address in the middle of the
|
||||
presumed call instruction. Since we didn't get here via
|
||||
a call, we need to include the nop before the real start
|
||||
to make up for it. */
|
||||
.long .LSTART_sigreturn-1-. /* PC-relative start address */
|
||||
.long .LEND_sigreturn-.LSTART_sigreturn+1
|
||||
.uleb128 0 /* Augmentation */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We record the locations of each register saved. This is
|
||||
complicated by the fact that the "CFA" is always assumed to
|
||||
be the value of the stack pointer in the caller. This means
|
||||
that we must define the CFA of this body of code to be the
|
||||
saved value of the stack pointer in the sigcontext. Which
|
||||
also means that there is no fixed relation to the other
|
||||
saved registers, which means that we must use DW_CFA_expression
|
||||
to compute their addresses. It also means that when we
|
||||
adjust the stack with the popl, we have to do it all over again. */
|
||||
|
||||
#define do_cfa_expr(offset) \
|
||||
.byte 0x0f; /* DW_CFA_def_cfa_expression */ \
|
||||
.uleb128 1f-0f; /* length */ \
|
||||
0: .byte 0x74; /* DW_OP_breg4 */ \
|
||||
.sleb128 offset; /* offset */ \
|
||||
.byte 0x06; /* DW_OP_deref */ \
|
||||
1:
|
||||
|
||||
#define do_expr(regno, offset) \
|
||||
.byte 0x10; /* DW_CFA_expression */ \
|
||||
.uleb128 regno; /* regno */ \
|
||||
.uleb128 1f-0f; /* length */ \
|
||||
0: .byte 0x74; /* DW_OP_breg4 */ \
|
||||
.sleb128 offset; /* offset */ \
|
||||
1:
|
||||
|
||||
do_cfa_expr(IA32_SIGCONTEXT_sp+4)
|
||||
do_expr(0, IA32_SIGCONTEXT_ax+4)
|
||||
do_expr(1, IA32_SIGCONTEXT_cx+4)
|
||||
do_expr(2, IA32_SIGCONTEXT_dx+4)
|
||||
do_expr(3, IA32_SIGCONTEXT_bx+4)
|
||||
do_expr(5, IA32_SIGCONTEXT_bp+4)
|
||||
do_expr(6, IA32_SIGCONTEXT_si+4)
|
||||
do_expr(7, IA32_SIGCONTEXT_di+4)
|
||||
do_expr(8, IA32_SIGCONTEXT_ip+4)
|
||||
|
||||
.byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
|
||||
|
||||
do_cfa_expr(IA32_SIGCONTEXT_sp)
|
||||
do_expr(0, IA32_SIGCONTEXT_ax)
|
||||
do_expr(1, IA32_SIGCONTEXT_cx)
|
||||
do_expr(2, IA32_SIGCONTEXT_dx)
|
||||
do_expr(3, IA32_SIGCONTEXT_bx)
|
||||
do_expr(5, IA32_SIGCONTEXT_bp)
|
||||
do_expr(6, IA32_SIGCONTEXT_si)
|
||||
do_expr(7, IA32_SIGCONTEXT_di)
|
||||
do_expr(8, IA32_SIGCONTEXT_ip)
|
||||
|
||||
.align 4
|
||||
.LENDFDEDLSI1:
|
||||
|
||||
.long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
|
||||
.LSTARTFDEDLSI2:
|
||||
.long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
|
||||
/* HACK: See above wrt unwind library assumptions. */
|
||||
.long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
|
||||
.long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
|
||||
.uleb128 0 /* Augmentation */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We record the locations of each register saved. This is
|
||||
slightly less complicated than the above, since we don't
|
||||
modify the stack pointer in the process. */
|
||||
|
||||
do_cfa_expr(IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_sp)
|
||||
do_expr(0, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ax)
|
||||
do_expr(1, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_cx)
|
||||
do_expr(2, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_dx)
|
||||
do_expr(3, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bx)
|
||||
do_expr(5, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_bp)
|
||||
do_expr(6, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_si)
|
||||
do_expr(7, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_di)
|
||||
do_expr(8, IA32_RT_SIGFRAME_sigcontext-4 + IA32_SIGCONTEXT_ip)
|
||||
|
||||
.align 4
|
||||
.LENDFDEDLSI2:
|
||||
.previous
|
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the syscall instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#define SYSCALL_ENTER_KERNEL syscall
|
||||
#include "sigreturn.S"
|
||||
|
||||
#include <asm/segment.h>
|
||||
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ebp
|
||||
.Lpush_ebp:
|
||||
movl %ecx, %ebp
|
||||
syscall
|
||||
movl %ebp, %ecx
|
||||
popl %ebp
|
||||
.Lpop_ebp:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAME:
|
||||
.long .LENDCIE-.LSTARTCIE
|
||||
.LSTARTCIE:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIE:
|
||||
|
||||
.long .LENDFDE1-.LSTARTFDE1 /* Length FDE */
|
||||
.LSTARTFDE1:
|
||||
.long .LSTARTFDE1-.LSTARTFRAME /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0 /* Augmentation length */
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + .Lpush_ebp-.LSTART_vsyscall /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 8
|
||||
.byte 0x85, 0x02 /* DW_CFA_offset %ebp -8 */
|
||||
.byte 0x40 + .Lpop_ebp-.Lpush_ebp /* DW_CFA_advance_loc */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.uleb128 4
|
||||
.align 4
|
||||
.LENDFDE1:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Pad out the segment to match the size of the sysenter.S version.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = 0x40
|
||||
.section .data,"aw",@progbits
|
||||
.space VDSO32_vsyscall_eh_frame_size-(.LENDFDE1-.LSTARTFRAME), 0
|
||||
.previous
|
@@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Code for the vDSO. This version uses the sysenter instruction.
|
||||
*
|
||||
* First get the common code for the sigreturn entry points.
|
||||
* This must come first.
|
||||
*/
|
||||
#include "sigreturn.S"
|
||||
|
||||
/*
|
||||
* The caller puts arg2 in %ecx, which gets pushed. The kernel will use
|
||||
* %ecx itself for arg2. The pushing is because the sysexit instruction
|
||||
* (found in entry.S) requires that we clobber %ecx with the desired %esp.
|
||||
* User code might expect that %ecx is unclobbered though, as it would be
|
||||
* for returning via the iret instruction, so we must push and pop.
|
||||
*
|
||||
* The caller puts arg3 in %edx, which the sysexit instruction requires
|
||||
* for %eip. Thus, exactly as for arg2, we must push and pop.
|
||||
*
|
||||
* Arg6 is different. The caller puts arg6 in %ebp. Since the sysenter
|
||||
* instruction clobbers %esp, the user's %esp won't even survive entry
|
||||
* into the kernel. We store %esp in %ebp. Code in entry.S must fetch
|
||||
* arg6 from the stack.
|
||||
*
|
||||
* You can not use this vsyscall for the clone() syscall because the
|
||||
* three words on the parent stack do not get copied to the child.
|
||||
*/
|
||||
.text
|
||||
.globl __kernel_vsyscall
|
||||
.type __kernel_vsyscall,@function
|
||||
ALIGN
|
||||
__kernel_vsyscall:
|
||||
.LSTART_vsyscall:
|
||||
push %ecx
|
||||
.Lpush_ecx:
|
||||
push %edx
|
||||
.Lpush_edx:
|
||||
push %ebp
|
||||
.Lenter_kernel:
|
||||
movl %esp,%ebp
|
||||
sysenter
|
||||
|
||||
/* 7: align return point with nop's to make disassembly easier */
|
||||
.space 7,0x90
|
||||
|
||||
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
|
||||
int $0x80
|
||||
/* 16: System call normal return point is here! */
|
||||
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
|
||||
pop %ebp
|
||||
.Lpop_ebp:
|
||||
pop %edx
|
||||
.Lpop_edx:
|
||||
pop %ecx
|
||||
.Lpop_ecx:
|
||||
ret
|
||||
.LEND_vsyscall:
|
||||
.size __kernel_vsyscall,.-.LSTART_vsyscall
|
||||
.previous
|
||||
|
||||
.section .eh_frame,"a",@progbits
|
||||
.LSTARTFRAMEDLSI:
|
||||
.long .LENDCIEDLSI-.LSTARTCIEDLSI
|
||||
.LSTARTCIEDLSI:
|
||||
.long 0 /* CIE ID */
|
||||
.byte 1 /* Version number */
|
||||
.string "zR" /* NUL-terminated augmentation string */
|
||||
.uleb128 1 /* Code alignment factor */
|
||||
.sleb128 -4 /* Data alignment factor */
|
||||
.byte 8 /* Return address register column */
|
||||
.uleb128 1 /* Augmentation value length */
|
||||
.byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
|
||||
.byte 0x0c /* DW_CFA_def_cfa */
|
||||
.uleb128 4
|
||||
.uleb128 4
|
||||
.byte 0x88 /* DW_CFA_offset, column 0x8 */
|
||||
.uleb128 1
|
||||
.align 4
|
||||
.LENDCIEDLSI:
|
||||
.long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
|
||||
.LSTARTFDEDLSI:
|
||||
.long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
|
||||
.long .LSTART_vsyscall-. /* PC-relative start address */
|
||||
.long .LEND_vsyscall-.LSTART_vsyscall
|
||||
.uleb128 0
|
||||
/* What follows are the instructions for the table generation.
|
||||
We have to record all changes of the stack pointer. */
|
||||
.byte 0x40 + (.Lpush_ecx-.LSTART_vsyscall) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpush_edx-.Lpush_ecx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0x40 + (.Lenter_kernel-.Lpush_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x10 /* RA at offset 16 now */
|
||||
.byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
|
||||
/* Finally the epilogue. */
|
||||
.byte 0x40 + (.Lpop_ebp-.Lenter_kernel) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x0c /* RA at offset 12 now */
|
||||
.byte 0xc5 /* DW_CFA_restore %ebp */
|
||||
.byte 0x40 + (.Lpop_edx-.Lpop_ebp) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x08 /* RA at offset 8 now */
|
||||
.byte 0x40 + (.Lpop_ecx-.Lpop_edx) /* DW_CFA_advance_loc */
|
||||
.byte 0x0e /* DW_CFA_def_cfa_offset */
|
||||
.byte 0x04 /* RA at offset 4 now */
|
||||
.align 4
|
||||
.LENDFDEDLSI:
|
||||
.previous
|
||||
|
||||
/*
|
||||
* Emit a symbol with the size of this .eh_frame data,
|
||||
* to verify it matches the other versions.
|
||||
*/
|
||||
VDSO32_vsyscall_eh_frame_size = (.LENDFDEDLSI-.LSTARTFRAMEDLSI)
|
@@ -0,0 +1,30 @@
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
#undef CONFIG_OPTIMIZE_INLINING
|
||||
#endif
|
||||
|
||||
#undef CONFIG_X86_PPRO_FENCE
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
/*
|
||||
* in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel
|
||||
* configuration
|
||||
*/
|
||||
#undef CONFIG_64BIT
|
||||
#undef CONFIG_X86_64
|
||||
#undef CONFIG_ILLEGAL_POINTER_VALUE
|
||||
#undef CONFIG_SPARSEMEM_VMEMMAP
|
||||
#undef CONFIG_NR_CPUS
|
||||
|
||||
#define CONFIG_X86_32 1
|
||||
#define CONFIG_PAGE_OFFSET 0
|
||||
#define CONFIG_ILLEGAL_POINTER_VALUE 0
|
||||
#define CONFIG_NR_CPUS 1
|
||||
|
||||
#define BUILD_VDSO32_64
|
||||
|
||||
#endif
|
||||
|
||||
#include "../vclock_gettime.c"
|
@@ -0,0 +1 @@
|
||||
#include "../vdso-fakesections.c"
|
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Linker script for 32-bit vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#include <asm/page.h>
|
||||
|
||||
#define BUILD_VDSO32
|
||||
|
||||
#include "../vdso-layout.lds.S"
|
||||
|
||||
/* The ELF entry point can be used to set the AT_SYSINFO value. */
|
||||
ENTRY(__kernel_vsyscall);
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION
|
||||
{
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
__vdso_clock_gettime;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_time;
|
||||
};
|
||||
|
||||
LINUX_2.5 {
|
||||
global:
|
||||
__kernel_vsyscall;
|
||||
__kernel_sigreturn;
|
||||
__kernel_rt_sigreturn;
|
||||
local: *;
|
||||
};
|
||||
}
|
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Linker script for x32 vDSO.
|
||||
* We #include the file to define the layout details.
|
||||
*
|
||||
* This file defines the version script giving the user-exported symbols in
|
||||
* the DSO.
|
||||
*/
|
||||
|
||||
#define BUILD_VDSOX32
|
||||
|
||||
#include "vdso-layout.lds.S"
|
||||
|
||||
/*
|
||||
* This controls what userland symbols we export from the vDSO.
|
||||
*/
|
||||
VERSION {
|
||||
LINUX_2.6 {
|
||||
global:
|
||||
__vdso_clock_gettime;
|
||||
__vdso_gettimeofday;
|
||||
__vdso_getcpu;
|
||||
__vdso_time;
|
||||
local: *;
|
||||
};
|
||||
}
|
28
arch/x86/entry/vdso/vgetcpu.c
普通文件
28
arch/x86/entry/vdso/vgetcpu.c
普通文件
@@ -0,0 +1,28 @@
|
||||
/*
|
||||
* Copyright 2006 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GNU Public License, v.2
|
||||
*
|
||||
* Fast user context implementation of getcpu()
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/getcpu.h>
|
||||
#include <linux/time.h>
|
||||
#include <asm/vgtod.h>
|
||||
|
||||
notrace long
|
||||
__vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
|
||||
{
|
||||
unsigned int p;
|
||||
|
||||
p = __getcpu();
|
||||
|
||||
if (cpu)
|
||||
*cpu = p & VGETCPU_CPU_MASK;
|
||||
if (node)
|
||||
*node = p >> 12;
|
||||
return 0;
|
||||
}
|
||||
|
||||
long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
||||
__attribute__((weak, alias("__vdso_getcpu")));
|
300
arch/x86/entry/vdso/vma.c
普通文件
300
arch/x86/entry/vdso/vma.c
普通文件
@@ -0,0 +1,300 @@
|
||||
/*
|
||||
* Copyright 2007 Andi Kleen, SUSE Labs.
|
||||
* Subject to the GPL, v.2
|
||||
*
|
||||
* This contains most of the x86 vDSO kernel-side code.
|
||||
*/
|
||||
#include <linux/mm.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/elf.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/vgtod.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/vdso.h>
|
||||
#include <asm/vvar.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/hpet.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#if defined(CONFIG_X86_64)
|
||||
unsigned int __read_mostly vdso64_enabled = 1;
|
||||
#endif
|
||||
|
||||
void __init init_vdso_image(const struct vdso_image *image)
|
||||
{
|
||||
int i;
|
||||
int npages = (image->size) / PAGE_SIZE;
|
||||
|
||||
BUG_ON(image->size % PAGE_SIZE != 0);
|
||||
for (i = 0; i < npages; i++)
|
||||
image->text_mapping.pages[i] =
|
||||
virt_to_page(image->data + i*PAGE_SIZE);
|
||||
|
||||
apply_alternatives((struct alt_instr *)(image->data + image->alt),
|
||||
(struct alt_instr *)(image->data + image->alt +
|
||||
image->alt_len));
|
||||
}
|
||||
|
||||
struct linux_binprm;
|
||||
|
||||
/*
|
||||
* Put the vdso above the (randomized) stack with another randomized
|
||||
* offset. This way there is no hole in the middle of address space.
|
||||
* To save memory make sure it is still in the same PTE as the stack
|
||||
* top. This doesn't give that many random bits.
|
||||
*
|
||||
* Note that this algorithm is imperfect: the distribution of the vdso
|
||||
* start address within a PMD is biased toward the end.
|
||||
*
|
||||
* Only used for the 64-bit and x32 vdsos.
|
||||
*/
|
||||
static unsigned long vdso_addr(unsigned long start, unsigned len)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return 0;
|
||||
#else
|
||||
unsigned long addr, end;
|
||||
unsigned offset;
|
||||
|
||||
/*
|
||||
* Round up the start address. It can start out unaligned as a result
|
||||
* of stack start randomization.
|
||||
*/
|
||||
start = PAGE_ALIGN(start);
|
||||
|
||||
/* Round the lowest possible end address up to a PMD boundary. */
|
||||
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
|
||||
if (end >= TASK_SIZE_MAX)
|
||||
end = TASK_SIZE_MAX;
|
||||
end -= len;
|
||||
|
||||
if (end > start) {
|
||||
offset = get_random_int() % (((end - start) >> PAGE_SHIFT) + 1);
|
||||
addr = start + (offset << PAGE_SHIFT);
|
||||
} else {
|
||||
addr = start;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forcibly align the final address in case we have a hardware
|
||||
* issue that requires alignment for performance reasons.
|
||||
*/
|
||||
addr = align_vdso_addr(addr);
|
||||
|
||||
return addr;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int map_vdso(const struct vdso_image *image, bool calculate_addr)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr, text_start;
|
||||
int ret = 0;
|
||||
static struct page *no_pages[] = {NULL};
|
||||
static struct vm_special_mapping vvar_mapping = {
|
||||
.name = "[vvar]",
|
||||
.pages = no_pages,
|
||||
};
|
||||
|
||||
if (calculate_addr) {
|
||||
addr = vdso_addr(current->mm->start_stack,
|
||||
image->size - image->sym_vvar_start);
|
||||
} else {
|
||||
addr = 0;
|
||||
}
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
addr = get_unmapped_area(NULL, addr,
|
||||
image->size - image->sym_vvar_start, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
text_start = addr - image->sym_vvar_start;
|
||||
current->mm->context.vdso = (void __user *)text_start;
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
*/
|
||||
vma = _install_special_mapping(mm,
|
||||
text_start,
|
||||
image->size,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
|
||||
&image->text_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
vma = _install_special_mapping(mm,
|
||||
addr,
|
||||
-image->sym_vvar_start,
|
||||
VM_READ|VM_MAYREAD,
|
||||
&vvar_mapping);
|
||||
|
||||
if (IS_ERR(vma)) {
|
||||
ret = PTR_ERR(vma);
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
if (image->sym_vvar_page)
|
||||
ret = remap_pfn_range(vma,
|
||||
text_start + image->sym_vvar_page,
|
||||
__pa_symbol(&__vvar_page) >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
PAGE_READONLY);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
|
||||
#ifdef CONFIG_HPET_TIMER
|
||||
if (hpet_address && image->sym_hpet_page) {
|
||||
ret = io_remap_pfn_range(vma,
|
||||
text_start + image->sym_hpet_page,
|
||||
hpet_address >> PAGE_SHIFT,
|
||||
PAGE_SIZE,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
#endif
|
||||
|
||||
up_fail:
|
||||
if (ret)
|
||||
current->mm->context.vdso = NULL;
|
||||
|
||||
up_write(&mm->mmap_sem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if defined(CONFIG_X86_32) || defined(CONFIG_COMPAT)
|
||||
static int load_vdso32(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (vdso32_enabled != 1) /* Other values all mean "disabled" */
|
||||
return 0;
|
||||
|
||||
ret = map_vdso(selected_vdso32, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (selected_vdso32->sym_VDSO32_SYSENTER_RETURN)
|
||||
current_thread_info()->sysenter_return =
|
||||
current->mm->context.vdso +
|
||||
selected_vdso32->sym_VDSO32_SYSENTER_RETURN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
{
|
||||
if (!vdso64_enabled)
|
||||
return 0;
|
||||
|
||||
return map_vdso(&vdso_image_64, true);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
|
||||
int uses_interp)
|
||||
{
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
if (test_thread_flag(TIF_X32)) {
|
||||
if (!vdso64_enabled)
|
||||
return 0;
|
||||
|
||||
return map_vdso(&vdso_image_x32, true);
|
||||
}
|
||||
#endif
|
||||
|
||||
return load_vdso32();
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
{
|
||||
return load_vdso32();
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static __init int vdso_setup(char *s)
|
||||
{
|
||||
vdso64_enabled = simple_strtoul(s, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
__setup("vdso=", vdso_setup);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static void vgetcpu_cpu_init(void *arg)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct desc_struct d = { };
|
||||
unsigned long node = 0;
|
||||
#ifdef CONFIG_NUMA
|
||||
node = cpu_to_node(cpu);
|
||||
#endif
|
||||
if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
|
||||
write_rdtscp_aux((node << 12) | cpu);
|
||||
|
||||
/*
|
||||
* Store cpu number in limit so that it can be loaded
|
||||
* quickly in user space in vgetcpu. (12 bits for the CPU
|
||||
* and 8 bits for the node)
|
||||
*/
|
||||
d.limit0 = cpu | ((node & 0xf) << 12);
|
||||
d.limit = node >> 4;
|
||||
d.type = 5; /* RO data, expand down, accessed */
|
||||
d.dpl = 3; /* Visible to user code */
|
||||
d.s = 1; /* Not a system segment */
|
||||
d.p = 1; /* Present */
|
||||
d.d = 1; /* 32-bit */
|
||||
|
||||
write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
||||
}
|
||||
|
||||
static int
|
||||
vgetcpu_cpu_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
||||
{
|
||||
long cpu = (long)arg;
|
||||
|
||||
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
||||
smp_call_function_single(cpu, vgetcpu_cpu_init, NULL, 1);
|
||||
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static int __init init_vdso(void)
|
||||
{
|
||||
init_vdso_image(&vdso_image_64);
|
||||
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
init_vdso_image(&vdso_image_x32);
|
||||
#endif
|
||||
|
||||
cpu_notifier_register_begin();
|
||||
|
||||
on_each_cpu(vgetcpu_cpu_init, NULL, 1);
|
||||
/* notifier priority > KVM */
|
||||
__hotcpu_notifier(vgetcpu_cpu_notifier, 30);
|
||||
|
||||
cpu_notifier_register_done();
|
||||
|
||||
return 0;
|
||||
}
|
||||
subsys_initcall(init_vdso);
|
||||
#endif /* CONFIG_X86_64 */
|
在新工单中引用
屏蔽一个用户