MIPS: VDSO: Add implementations of gettimeofday() and clock_gettime()
Add user-mode implementations of gettimeofday() and clock_gettime() to the VDSO. This is currently usable with 2 clocksources: the CP0 count register, which is accessible to user-mode via RDHWR on R2 and later cores, or the MIPS Global Interrupt Controller (GIC) timer, which provides a "user-mode visible" section containing a mirror of its counter registers. This section must be mapped into user memory, which is done below the VDSO data page. When a supported clocksource is not in use, the VDSO functions will return -ENOSYS, which causes libc to fall back on the standard syscall path. When support for neither of these clocksources is compiled into the kernel at all, the VDSO still provides clock_gettime(), as the coarse realtime/monotonic clocks can still be implemented. However, gettimeofday() is not provided in this case as nothing can be done without a suitable clocksource. This causes the symbol lookup to fail in libc and it will then always use the standard syscall path. This patch includes a workaround for a bug in QEMU which results in RDHWR on the CP0 count register always returning a constant (incorrect) value. A fix for this has been submitted, and the workaround can be removed after the fix has been in stable releases for a reasonable amount of time. A simple performance test which calls gettimeofday() 1000 times in a loop and calculates the average execution time gives the following results on a Malta + I6400 (running at 20MHz): - Syscall: ~31000 ns - VDSO (GIC): ~15000 ns - VDSO (CP0): ~9500 ns [markos.chandras@imgtec.com: - Minor code re-arrangements in order for mappings to be made in the order they appear to the process' address space. - Move do_{monotonic, realtime} outside of the MIPS_CLOCK_VSYSCALL ifdef - Use gic_get_usm_range so we can do the GIC mapping in the arch/mips/kernel/vdso instead of the GIC irqchip driver] Signed-off-by: Alex Smith <alex.smith@imgtec.com> Signed-off-by: Markos Chandras <markos.chandras@imgtec.com> Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11338/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
This commit is contained in:
@@ -28,6 +28,43 @@ static u64 notrace r4k_read_sched_clock(void)
|
||||
return read_c0_count();
|
||||
}
|
||||
|
||||
static inline unsigned int rdhwr_count(void)
|
||||
{
|
||||
unsigned int count;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" .set push\n"
|
||||
" .set mips32r2\n"
|
||||
" rdhwr %0, $2\n"
|
||||
" .set pop\n"
|
||||
: "=r" (count));
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static bool rdhwr_count_usable(void)
|
||||
{
|
||||
unsigned int prev, curr, i;
|
||||
|
||||
/*
|
||||
* Older QEMUs have a broken implementation of RDHWR for the CP0 count
|
||||
* which always returns a constant value. Try to identify this and don't
|
||||
* use it in the VDSO if it is broken. This workaround can be removed
|
||||
* once the fix has been in QEMU stable for a reasonable amount of time.
|
||||
*/
|
||||
for (i = 0, prev = rdhwr_count(); i < 100; i++) {
|
||||
curr = rdhwr_count();
|
||||
|
||||
if (curr != prev)
|
||||
return true;
|
||||
|
||||
prev = curr;
|
||||
}
|
||||
|
||||
pr_warn("Not using R4K clocksource in VDSO due to broken RDHWR\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
int __init init_r4k_clocksource(void)
|
||||
{
|
||||
if (!cpu_has_counter || !mips_hpt_frequency)
|
||||
@@ -36,6 +73,13 @@ int __init init_r4k_clocksource(void)
|
||||
/* Calculate a somewhat reasonable rating value */
|
||||
clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
|
||||
|
||||
/*
|
||||
* R2 onwards makes the count accessible to user mode so it can be used
|
||||
* by the VDSO (HWREna is configured by configure_hwrena()).
|
||||
*/
|
||||
if (cpu_has_mips_r2_r6 && rdhwr_count_usable())
|
||||
clocksource_mips.archdata.vdso_clock_mode = VDSO_CLOCK_R4K;
|
||||
|
||||
clocksource_register_hz(&clocksource_mips, mips_hpt_frequency);
|
||||
|
||||
sched_clock_register(r4k_read_sched_clock, 32, mips_hpt_frequency);
|
||||
|
@@ -12,9 +12,12 @@
|
||||
#include <linux/elf.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/irqchip/mips-gic.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/timekeeper_internal.h>
|
||||
|
||||
#include <asm/abi.h>
|
||||
#include <asm/vdso.h>
|
||||
@@ -23,7 +26,7 @@
|
||||
static union mips_vdso_data vdso_data __page_aligned_data;
|
||||
|
||||
/*
|
||||
* Mapping for the VDSO data pages. The real pages are mapped manually, as
|
||||
* Mapping for the VDSO data/GIC pages. The real pages are mapped manually, as
|
||||
* what we map and where within the area they are mapped is determined at
|
||||
* runtime.
|
||||
*/
|
||||
@@ -64,25 +67,67 @@ static int __init init_vdso(void)
|
||||
}
|
||||
subsys_initcall(init_vdso);
|
||||
|
||||
void update_vsyscall(struct timekeeper *tk)
|
||||
{
|
||||
vdso_data_write_begin(&vdso_data);
|
||||
|
||||
vdso_data.xtime_sec = tk->xtime_sec;
|
||||
vdso_data.xtime_nsec = tk->tkr_mono.xtime_nsec;
|
||||
vdso_data.wall_to_mono_sec = tk->wall_to_monotonic.tv_sec;
|
||||
vdso_data.wall_to_mono_nsec = tk->wall_to_monotonic.tv_nsec;
|
||||
vdso_data.cs_shift = tk->tkr_mono.shift;
|
||||
|
||||
vdso_data.clock_mode = tk->tkr_mono.clock->archdata.vdso_clock_mode;
|
||||
if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
|
||||
vdso_data.cs_mult = tk->tkr_mono.mult;
|
||||
vdso_data.cs_cycle_last = tk->tkr_mono.cycle_last;
|
||||
vdso_data.cs_mask = tk->tkr_mono.mask;
|
||||
}
|
||||
|
||||
vdso_data_write_end(&vdso_data);
|
||||
}
|
||||
|
||||
void update_vsyscall_tz(void)
|
||||
{
|
||||
if (vdso_data.clock_mode != VDSO_CLOCK_NONE) {
|
||||
vdso_data.tz_minuteswest = sys_tz.tz_minuteswest;
|
||||
vdso_data.tz_dsttime = sys_tz.tz_dsttime;
|
||||
}
|
||||
}
|
||||
|
||||
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
{
|
||||
struct mips_vdso_image *image = current->thread.abi->vdso;
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long base, vdso_addr;
|
||||
unsigned long gic_size, vvar_size, size, base, data_addr, vdso_addr;
|
||||
struct vm_area_struct *vma;
|
||||
struct resource gic_res;
|
||||
int ret;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
|
||||
base = get_unmapped_area(NULL, 0, PAGE_SIZE + image->size, 0, 0);
|
||||
/*
|
||||
* Determine total area size. This includes the VDSO data itself, the
|
||||
* data page, and the GIC user page if present. Always create a mapping
|
||||
* for the GIC user area if the GIC is present regardless of whether it
|
||||
* is the current clocksource, in case it comes into use later on. We
|
||||
* only map a page even though the total area is 64K, as we only need
|
||||
* the counter registers at the start.
|
||||
*/
|
||||
gic_size = gic_present ? PAGE_SIZE : 0;
|
||||
vvar_size = gic_size + PAGE_SIZE;
|
||||
size = vvar_size + image->size;
|
||||
|
||||
base = get_unmapped_area(NULL, 0, size, 0, 0);
|
||||
if (IS_ERR_VALUE(base)) {
|
||||
ret = base;
|
||||
goto out;
|
||||
}
|
||||
|
||||
vdso_addr = base + PAGE_SIZE;
|
||||
data_addr = base + gic_size;
|
||||
vdso_addr = data_addr + PAGE_SIZE;
|
||||
|
||||
vma = _install_special_mapping(mm, base, PAGE_SIZE,
|
||||
vma = _install_special_mapping(mm, base, vvar_size,
|
||||
VM_READ | VM_MAYREAD,
|
||||
&vdso_vvar_mapping);
|
||||
if (IS_ERR(vma)) {
|
||||
@@ -90,8 +135,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Map GIC user page. */
|
||||
if (gic_size) {
|
||||
ret = gic_get_usm_range(&gic_res);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = io_remap_pfn_range(vma, base,
|
||||
gic_res.start >> PAGE_SHIFT,
|
||||
gic_size,
|
||||
pgprot_noncached(PAGE_READONLY));
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Map data page. */
|
||||
ret = remap_pfn_range(vma, base,
|
||||
ret = remap_pfn_range(vma, data_addr,
|
||||
virt_to_phys(&vdso_data) >> PAGE_SHIFT,
|
||||
PAGE_SIZE, PAGE_READONLY);
|
||||
if (ret)
|
||||
|
Reference in New Issue
Block a user