vdso.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
  4. * <[email protected]>
  5. * Copyright (C) 2012 ARM Limited
  6. * Copyright (C) 2015 Regents of the University of California
  7. */
  8. #include <linux/elf.h>
  9. #include <linux/mm.h>
  10. #include <linux/slab.h>
  11. #include <linux/binfmts.h>
  12. #include <linux/err.h>
  13. #include <asm/page.h>
  14. #include <asm/vdso.h>
  15. #include <linux/time_namespace.h>
  16. #ifdef CONFIG_GENERIC_TIME_VSYSCALL
  17. #include <vdso/datapage.h>
  18. #else
  19. struct vdso_data {
  20. };
  21. #endif
  22. extern char vdso_start[], vdso_end[];
  23. #ifdef CONFIG_COMPAT
  24. extern char compat_vdso_start[], compat_vdso_end[];
  25. #endif
  26. enum vvar_pages {
  27. VVAR_DATA_PAGE_OFFSET,
  28. VVAR_TIMENS_PAGE_OFFSET,
  29. VVAR_NR_PAGES,
  30. };
  31. enum rv_vdso_map {
  32. RV_VDSO_MAP_VVAR,
  33. RV_VDSO_MAP_VDSO,
  34. };
  35. #define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT)
  36. /*
  37. * The vDSO data page.
  38. */
  39. static union {
  40. struct vdso_data data;
  41. u8 page[PAGE_SIZE];
  42. } vdso_data_store __page_aligned_data;
  43. struct vdso_data *vdso_data = &vdso_data_store.data;
  44. struct __vdso_info {
  45. const char *name;
  46. const char *vdso_code_start;
  47. const char *vdso_code_end;
  48. unsigned long vdso_pages;
  49. /* Data Mapping */
  50. struct vm_special_mapping *dm;
  51. /* Code Mapping */
  52. struct vm_special_mapping *cm;
  53. };
  54. static struct __vdso_info vdso_info;
  55. #ifdef CONFIG_COMPAT
  56. static struct __vdso_info compat_vdso_info;
  57. #endif
  58. static int vdso_mremap(const struct vm_special_mapping *sm,
  59. struct vm_area_struct *new_vma)
  60. {
  61. current->mm->context.vdso = (void *)new_vma->vm_start;
  62. return 0;
  63. }
  64. static void __init __vdso_init(struct __vdso_info *vdso_info)
  65. {
  66. unsigned int i;
  67. struct page **vdso_pagelist;
  68. unsigned long pfn;
  69. if (memcmp(vdso_info->vdso_code_start, "\177ELF", 4))
  70. panic("vDSO is not a valid ELF object!\n");
  71. vdso_info->vdso_pages = (
  72. vdso_info->vdso_code_end -
  73. vdso_info->vdso_code_start) >>
  74. PAGE_SHIFT;
  75. vdso_pagelist = kcalloc(vdso_info->vdso_pages,
  76. sizeof(struct page *),
  77. GFP_KERNEL);
  78. if (vdso_pagelist == NULL)
  79. panic("vDSO kcalloc failed!\n");
  80. /* Grab the vDSO code pages. */
  81. pfn = sym_to_pfn(vdso_info->vdso_code_start);
  82. for (i = 0; i < vdso_info->vdso_pages; i++)
  83. vdso_pagelist[i] = pfn_to_page(pfn + i);
  84. vdso_info->cm->pages = vdso_pagelist;
  85. }
  86. #ifdef CONFIG_TIME_NS
  87. struct vdso_data *arch_get_vdso_data(void *vvar_page)
  88. {
  89. return (struct vdso_data *)(vvar_page);
  90. }
  91. /*
  92. * The vvar mapping contains data for a specific time namespace, so when a task
  93. * changes namespace we must unmap its vvar data for the old namespace.
  94. * Subsequent faults will map in data for the new namespace.
  95. *
  96. * For more details see timens_setup_vdso_data().
  97. */
  98. int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
  99. {
  100. struct mm_struct *mm = task->mm;
  101. struct vm_area_struct *vma;
  102. VMA_ITERATOR(vmi, mm, 0);
  103. mmap_read_lock(mm);
  104. for_each_vma(vmi, vma) {
  105. unsigned long size = vma->vm_end - vma->vm_start;
  106. if (vma_is_special_mapping(vma, vdso_info.dm))
  107. zap_page_range(vma, vma->vm_start, size);
  108. #ifdef CONFIG_COMPAT
  109. if (vma_is_special_mapping(vma, compat_vdso_info.dm))
  110. zap_page_range(vma, vma->vm_start, size);
  111. #endif
  112. }
  113. mmap_read_unlock(mm);
  114. return 0;
  115. }
  116. static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  117. {
  118. if (likely(vma->vm_mm == current->mm))
  119. return current->nsproxy->time_ns->vvar_page;
  120. /*
  121. * VM_PFNMAP | VM_IO protect .fault() handler from being called
  122. * through interfaces like /proc/$pid/mem or
  123. * process_vm_{readv,writev}() as long as there's no .access()
  124. * in special_mapping_vmops.
  125. * For more details check_vma_flags() and __access_remote_vm()
  126. */
  127. WARN(1, "vvar_page accessed remotely");
  128. return NULL;
  129. }
  130. #else
  131. static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  132. {
  133. return NULL;
  134. }
  135. #endif
  136. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  137. struct vm_area_struct *vma, struct vm_fault *vmf)
  138. {
  139. struct page *timens_page = find_timens_vvar_page(vma);
  140. unsigned long pfn;
  141. switch (vmf->pgoff) {
  142. case VVAR_DATA_PAGE_OFFSET:
  143. if (timens_page)
  144. pfn = page_to_pfn(timens_page);
  145. else
  146. pfn = sym_to_pfn(vdso_data);
  147. break;
  148. #ifdef CONFIG_TIME_NS
  149. case VVAR_TIMENS_PAGE_OFFSET:
  150. /*
  151. * If a task belongs to a time namespace then a namespace
  152. * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
  153. * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
  154. * offset.
  155. * See also the comment near timens_setup_vdso_data().
  156. */
  157. if (!timens_page)
  158. return VM_FAULT_SIGBUS;
  159. pfn = sym_to_pfn(vdso_data);
  160. break;
  161. #endif /* CONFIG_TIME_NS */
  162. default:
  163. return VM_FAULT_SIGBUS;
  164. }
  165. return vmf_insert_pfn(vma, vmf->address, pfn);
  166. }
  167. static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = {
  168. [RV_VDSO_MAP_VVAR] = {
  169. .name = "[vvar]",
  170. .fault = vvar_fault,
  171. },
  172. [RV_VDSO_MAP_VDSO] = {
  173. .name = "[vdso]",
  174. .mremap = vdso_mremap,
  175. },
  176. };
  177. static struct __vdso_info vdso_info __ro_after_init = {
  178. .name = "vdso",
  179. .vdso_code_start = vdso_start,
  180. .vdso_code_end = vdso_end,
  181. .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR],
  182. .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO],
  183. };
  184. #ifdef CONFIG_COMPAT
  185. static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = {
  186. [RV_VDSO_MAP_VVAR] = {
  187. .name = "[vvar]",
  188. .fault = vvar_fault,
  189. },
  190. [RV_VDSO_MAP_VDSO] = {
  191. .name = "[vdso]",
  192. .mremap = vdso_mremap,
  193. },
  194. };
  195. static struct __vdso_info compat_vdso_info __ro_after_init = {
  196. .name = "compat_vdso",
  197. .vdso_code_start = compat_vdso_start,
  198. .vdso_code_end = compat_vdso_end,
  199. .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR],
  200. .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO],
  201. };
  202. #endif
  203. static int __init vdso_init(void)
  204. {
  205. __vdso_init(&vdso_info);
  206. #ifdef CONFIG_COMPAT
  207. __vdso_init(&compat_vdso_info);
  208. #endif
  209. return 0;
  210. }
  211. arch_initcall(vdso_init);
  212. static int __setup_additional_pages(struct mm_struct *mm,
  213. struct linux_binprm *bprm,
  214. int uses_interp,
  215. struct __vdso_info *vdso_info)
  216. {
  217. unsigned long vdso_base, vdso_text_len, vdso_mapping_len;
  218. void *ret;
  219. BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
  220. vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT;
  221. /* Be sure to map the data page */
  222. vdso_mapping_len = vdso_text_len + VVAR_SIZE;
  223. vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
  224. if (IS_ERR_VALUE(vdso_base)) {
  225. ret = ERR_PTR(vdso_base);
  226. goto up_fail;
  227. }
  228. ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE,
  229. (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm);
  230. if (IS_ERR(ret))
  231. goto up_fail;
  232. vdso_base += VVAR_SIZE;
  233. mm->context.vdso = (void *)vdso_base;
  234. ret =
  235. _install_special_mapping(mm, vdso_base, vdso_text_len,
  236. (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
  237. vdso_info->cm);
  238. if (IS_ERR(ret))
  239. goto up_fail;
  240. return 0;
  241. up_fail:
  242. mm->context.vdso = NULL;
  243. return PTR_ERR(ret);
  244. }
  245. #ifdef CONFIG_COMPAT
  246. int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
  247. int uses_interp)
  248. {
  249. struct mm_struct *mm = current->mm;
  250. int ret;
  251. if (mmap_write_lock_killable(mm))
  252. return -EINTR;
  253. ret = __setup_additional_pages(mm, bprm, uses_interp,
  254. &compat_vdso_info);
  255. mmap_write_unlock(mm);
  256. return ret;
  257. }
  258. #endif
  259. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  260. {
  261. struct mm_struct *mm = current->mm;
  262. int ret;
  263. if (mmap_write_lock_killable(mm))
  264. return -EINTR;
  265. ret = __setup_additional_pages(mm, bprm, uses_interp, &vdso_info);
  266. mmap_write_unlock(mm);
  267. return ret;
  268. }