vdso.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VDSO implementations.
  4. *
  5. * Copyright (C) 2012 ARM Limited
  6. *
  7. * Author: Will Deacon <[email protected]>
  8. */
  9. #include <linux/cache.h>
  10. #include <linux/clocksource.h>
  11. #include <linux/elf.h>
  12. #include <linux/err.h>
  13. #include <linux/errno.h>
  14. #include <linux/gfp.h>
  15. #include <linux/kernel.h>
  16. #include <linux/mm.h>
  17. #include <linux/sched.h>
  18. #include <linux/signal.h>
  19. #include <linux/slab.h>
  20. #include <linux/time_namespace.h>
  21. #include <linux/timekeeper_internal.h>
  22. #include <linux/vmalloc.h>
  23. #include <vdso/datapage.h>
  24. #include <vdso/helpers.h>
  25. #include <vdso/vsyscall.h>
  26. #include <asm/cacheflush.h>
  27. #include <asm/signal32.h>
  28. #include <asm/vdso.h>
  29. enum vdso_abi {
  30. VDSO_ABI_AA64,
  31. VDSO_ABI_AA32,
  32. };
  33. enum vvar_pages {
  34. VVAR_DATA_PAGE_OFFSET,
  35. VVAR_TIMENS_PAGE_OFFSET,
  36. VVAR_NR_PAGES,
  37. };
  38. struct vdso_abi_info {
  39. const char *name;
  40. const char *vdso_code_start;
  41. const char *vdso_code_end;
  42. unsigned long vdso_pages;
  43. /* Data Mapping */
  44. struct vm_special_mapping *dm;
  45. /* Code Mapping */
  46. struct vm_special_mapping *cm;
  47. };
  48. static struct vdso_abi_info vdso_info[] __ro_after_init = {
  49. [VDSO_ABI_AA64] = {
  50. .name = "vdso",
  51. .vdso_code_start = vdso_start,
  52. .vdso_code_end = vdso_end,
  53. },
  54. #ifdef CONFIG_COMPAT_VDSO
  55. [VDSO_ABI_AA32] = {
  56. .name = "vdso32",
  57. .vdso_code_start = vdso32_start,
  58. .vdso_code_end = vdso32_end,
  59. },
  60. #endif /* CONFIG_COMPAT_VDSO */
  61. };
  62. /*
  63. * The vDSO data page.
  64. */
  65. static union {
  66. struct vdso_data data[CS_BASES];
  67. u8 page[PAGE_SIZE];
  68. } vdso_data_store __page_aligned_data;
  69. struct vdso_data *vdso_data = vdso_data_store.data;
  70. static int vdso_mremap(const struct vm_special_mapping *sm,
  71. struct vm_area_struct *new_vma)
  72. {
  73. current->mm->context.vdso = (void *)new_vma->vm_start;
  74. return 0;
  75. }
  76. static int __init __vdso_init(enum vdso_abi abi)
  77. {
  78. int i;
  79. struct page **vdso_pagelist;
  80. unsigned long pfn;
  81. if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) {
  82. pr_err("vDSO is not a valid ELF object!\n");
  83. return -EINVAL;
  84. }
  85. vdso_info[abi].vdso_pages = (
  86. vdso_info[abi].vdso_code_end -
  87. vdso_info[abi].vdso_code_start) >>
  88. PAGE_SHIFT;
  89. vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages,
  90. sizeof(struct page *),
  91. GFP_KERNEL);
  92. if (vdso_pagelist == NULL)
  93. return -ENOMEM;
  94. /* Grab the vDSO code pages. */
  95. pfn = sym_to_pfn(vdso_info[abi].vdso_code_start);
  96. for (i = 0; i < vdso_info[abi].vdso_pages; i++)
  97. vdso_pagelist[i] = pfn_to_page(pfn + i);
  98. vdso_info[abi].cm->pages = vdso_pagelist;
  99. return 0;
  100. }
  101. #ifdef CONFIG_TIME_NS
  102. struct vdso_data *arch_get_vdso_data(void *vvar_page)
  103. {
  104. return (struct vdso_data *)(vvar_page);
  105. }
  106. /*
  107. * The vvar mapping contains data for a specific time namespace, so when a task
  108. * changes namespace we must unmap its vvar data for the old namespace.
  109. * Subsequent faults will map in data for the new namespace.
  110. *
  111. * For more details see timens_setup_vdso_data().
  112. */
  113. int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
  114. {
  115. struct mm_struct *mm = task->mm;
  116. struct vm_area_struct *vma;
  117. VMA_ITERATOR(vmi, mm, 0);
  118. mmap_read_lock(mm);
  119. for_each_vma(vmi, vma) {
  120. unsigned long size = vma->vm_end - vma->vm_start;
  121. if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA64].dm))
  122. zap_page_range(vma, vma->vm_start, size);
  123. #ifdef CONFIG_COMPAT_VDSO
  124. if (vma_is_special_mapping(vma, vdso_info[VDSO_ABI_AA32].dm))
  125. zap_page_range(vma, vma->vm_start, size);
  126. #endif
  127. }
  128. mmap_read_unlock(mm);
  129. return 0;
  130. }
  131. static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  132. {
  133. if (likely(vma->vm_mm == current->mm))
  134. return current->nsproxy->time_ns->vvar_page;
  135. /*
  136. * VM_PFNMAP | VM_IO protect .fault() handler from being called
  137. * through interfaces like /proc/$pid/mem or
  138. * process_vm_{readv,writev}() as long as there's no .access()
  139. * in special_mapping_vmops.
  140. * For more details check_vma_flags() and __access_remote_vm()
  141. */
  142. WARN(1, "vvar_page accessed remotely");
  143. return NULL;
  144. }
  145. #else
  146. static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  147. {
  148. return NULL;
  149. }
  150. #endif
  151. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  152. struct vm_area_struct *vma, struct vm_fault *vmf)
  153. {
  154. struct page *timens_page = find_timens_vvar_page(vma);
  155. unsigned long pfn;
  156. switch (vmf->pgoff) {
  157. case VVAR_DATA_PAGE_OFFSET:
  158. if (timens_page)
  159. pfn = page_to_pfn(timens_page);
  160. else
  161. pfn = sym_to_pfn(vdso_data);
  162. break;
  163. #ifdef CONFIG_TIME_NS
  164. case VVAR_TIMENS_PAGE_OFFSET:
  165. /*
  166. * If a task belongs to a time namespace then a namespace
  167. * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
  168. * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
  169. * offset.
  170. * See also the comment near timens_setup_vdso_data().
  171. */
  172. if (!timens_page)
  173. return VM_FAULT_SIGBUS;
  174. pfn = sym_to_pfn(vdso_data);
  175. break;
  176. #endif /* CONFIG_TIME_NS */
  177. default:
  178. return VM_FAULT_SIGBUS;
  179. }
  180. return vmf_insert_pfn(vma, vmf->address, pfn);
  181. }
  182. static int __setup_additional_pages(enum vdso_abi abi,
  183. struct mm_struct *mm,
  184. struct linux_binprm *bprm,
  185. int uses_interp)
  186. {
  187. unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
  188. unsigned long gp_flags = 0;
  189. void *ret;
  190. BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
  191. vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT;
  192. /* Be sure to map the data page */
  193. vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
  194. vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
  195. if (IS_ERR_VALUE(vdso_base)) {
  196. ret = ERR_PTR(vdso_base);
  197. goto up_fail;
  198. }
  199. ret = _install_special_mapping(mm, vdso_base, VVAR_NR_PAGES * PAGE_SIZE,
  200. VM_READ|VM_MAYREAD|VM_PFNMAP,
  201. vdso_info[abi].dm);
  202. if (IS_ERR(ret))
  203. goto up_fail;
  204. if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti())
  205. gp_flags = VM_ARM64_BTI;
  206. vdso_base += VVAR_NR_PAGES * PAGE_SIZE;
  207. mm->context.vdso = (void *)vdso_base;
  208. ret = _install_special_mapping(mm, vdso_base, vdso_text_len,
  209. VM_READ|VM_EXEC|gp_flags|
  210. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  211. vdso_info[abi].cm);
  212. if (IS_ERR(ret))
  213. goto up_fail;
  214. return 0;
  215. up_fail:
  216. mm->context.vdso = NULL;
  217. return PTR_ERR(ret);
  218. }
  219. #ifdef CONFIG_COMPAT
  220. /*
  221. * Create and map the vectors page for AArch32 tasks.
  222. */
  223. enum aarch32_map {
  224. AA32_MAP_VECTORS, /* kuser helpers */
  225. AA32_MAP_SIGPAGE,
  226. AA32_MAP_VVAR,
  227. AA32_MAP_VDSO,
  228. };
  229. static struct page *aarch32_vectors_page __ro_after_init;
  230. static struct page *aarch32_sig_page __ro_after_init;
  231. static int aarch32_sigpage_mremap(const struct vm_special_mapping *sm,
  232. struct vm_area_struct *new_vma)
  233. {
  234. current->mm->context.sigpage = (void *)new_vma->vm_start;
  235. return 0;
  236. }
  237. static struct vm_special_mapping aarch32_vdso_maps[] = {
  238. [AA32_MAP_VECTORS] = {
  239. .name = "[vectors]", /* ABI */
  240. .pages = &aarch32_vectors_page,
  241. },
  242. [AA32_MAP_SIGPAGE] = {
  243. .name = "[sigpage]", /* ABI */
  244. .pages = &aarch32_sig_page,
  245. .mremap = aarch32_sigpage_mremap,
  246. },
  247. [AA32_MAP_VVAR] = {
  248. .name = "[vvar]",
  249. .fault = vvar_fault,
  250. },
  251. [AA32_MAP_VDSO] = {
  252. .name = "[vdso]",
  253. .mremap = vdso_mremap,
  254. },
  255. };
  256. static int aarch32_alloc_kuser_vdso_page(void)
  257. {
  258. extern char __kuser_helper_start[], __kuser_helper_end[];
  259. int kuser_sz = __kuser_helper_end - __kuser_helper_start;
  260. unsigned long vdso_page;
  261. if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
  262. return 0;
  263. vdso_page = get_zeroed_page(GFP_KERNEL);
  264. if (!vdso_page)
  265. return -ENOMEM;
  266. memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
  267. kuser_sz);
  268. aarch32_vectors_page = virt_to_page((void *)vdso_page);
  269. return 0;
  270. }
  271. #define COMPAT_SIGPAGE_POISON_WORD 0xe7fddef1
  272. static int aarch32_alloc_sigpage(void)
  273. {
  274. extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
  275. int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
  276. __le32 poison = cpu_to_le32(COMPAT_SIGPAGE_POISON_WORD);
  277. void *sigpage;
  278. sigpage = (void *)__get_free_page(GFP_KERNEL);
  279. if (!sigpage)
  280. return -ENOMEM;
  281. memset32(sigpage, (__force u32)poison, PAGE_SIZE / sizeof(poison));
  282. memcpy(sigpage, __aarch32_sigret_code_start, sigret_sz);
  283. aarch32_sig_page = virt_to_page(sigpage);
  284. return 0;
  285. }
  286. static int __init __aarch32_alloc_vdso_pages(void)
  287. {
  288. if (!IS_ENABLED(CONFIG_COMPAT_VDSO))
  289. return 0;
  290. vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR];
  291. vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO];
  292. return __vdso_init(VDSO_ABI_AA32);
  293. }
  294. static int __init aarch32_alloc_vdso_pages(void)
  295. {
  296. int ret;
  297. ret = __aarch32_alloc_vdso_pages();
  298. if (ret)
  299. return ret;
  300. ret = aarch32_alloc_sigpage();
  301. if (ret)
  302. return ret;
  303. return aarch32_alloc_kuser_vdso_page();
  304. }
  305. arch_initcall(aarch32_alloc_vdso_pages);
  306. static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
  307. {
  308. void *ret;
  309. if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
  310. return 0;
  311. /*
  312. * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
  313. * not safe to CoW the page containing the CPU exception vectors.
  314. */
  315. ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
  316. VM_READ | VM_EXEC |
  317. VM_MAYREAD | VM_MAYEXEC,
  318. &aarch32_vdso_maps[AA32_MAP_VECTORS]);
  319. return PTR_ERR_OR_ZERO(ret);
  320. }
  321. static int aarch32_sigreturn_setup(struct mm_struct *mm)
  322. {
  323. unsigned long addr;
  324. void *ret;
  325. addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
  326. if (IS_ERR_VALUE(addr)) {
  327. ret = ERR_PTR(addr);
  328. goto out;
  329. }
  330. /*
  331. * VM_MAYWRITE is required to allow gdb to Copy-on-Write and
  332. * set breakpoints.
  333. */
  334. ret = _install_special_mapping(mm, addr, PAGE_SIZE,
  335. VM_READ | VM_EXEC | VM_MAYREAD |
  336. VM_MAYWRITE | VM_MAYEXEC,
  337. &aarch32_vdso_maps[AA32_MAP_SIGPAGE]);
  338. if (IS_ERR(ret))
  339. goto out;
  340. mm->context.sigpage = (void *)addr;
  341. out:
  342. return PTR_ERR_OR_ZERO(ret);
  343. }
  344. int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  345. {
  346. struct mm_struct *mm = current->mm;
  347. int ret;
  348. if (mmap_write_lock_killable(mm))
  349. return -EINTR;
  350. ret = aarch32_kuser_helpers_setup(mm);
  351. if (ret)
  352. goto out;
  353. if (IS_ENABLED(CONFIG_COMPAT_VDSO)) {
  354. ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm,
  355. uses_interp);
  356. if (ret)
  357. goto out;
  358. }
  359. ret = aarch32_sigreturn_setup(mm);
  360. out:
  361. mmap_write_unlock(mm);
  362. return ret;
  363. }
  364. #endif /* CONFIG_COMPAT */
  365. enum aarch64_map {
  366. AA64_MAP_VVAR,
  367. AA64_MAP_VDSO,
  368. };
  369. static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = {
  370. [AA64_MAP_VVAR] = {
  371. .name = "[vvar]",
  372. .fault = vvar_fault,
  373. },
  374. [AA64_MAP_VDSO] = {
  375. .name = "[vdso]",
  376. .mremap = vdso_mremap,
  377. },
  378. };
  379. static int __init vdso_init(void)
  380. {
  381. vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR];
  382. vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO];
  383. return __vdso_init(VDSO_ABI_AA64);
  384. }
  385. arch_initcall(vdso_init);
  386. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  387. {
  388. struct mm_struct *mm = current->mm;
  389. int ret;
  390. if (mmap_write_lock_killable(mm))
  391. return -EINTR;
  392. ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp);
  393. mmap_write_unlock(mm);
  394. return ret;
  395. }