vdso.c 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * vdso setup for s390
  4. *
  5. * Copyright IBM Corp. 2008
  6. * Author(s): Martin Schwidefsky ([email protected])
  7. */
  8. #include <linux/binfmts.h>
  9. #include <linux/compat.h>
  10. #include <linux/elf.h>
  11. #include <linux/errno.h>
  12. #include <linux/init.h>
  13. #include <linux/kernel.h>
  14. #include <linux/mm.h>
  15. #include <linux/slab.h>
  16. #include <linux/smp.h>
  17. #include <linux/time_namespace.h>
  18. #include <linux/random.h>
  19. #include <vdso/datapage.h>
  20. #include <asm/vdso.h>
  21. extern char vdso64_start[], vdso64_end[];
  22. extern char vdso32_start[], vdso32_end[];
  23. static struct vm_special_mapping vvar_mapping;
  24. static union {
  25. struct vdso_data data[CS_BASES];
  26. u8 page[PAGE_SIZE];
  27. } vdso_data_store __page_aligned_data;
  28. struct vdso_data *vdso_data = vdso_data_store.data;
  29. enum vvar_pages {
  30. VVAR_DATA_PAGE_OFFSET,
  31. VVAR_TIMENS_PAGE_OFFSET,
  32. VVAR_NR_PAGES,
  33. };
  34. #ifdef CONFIG_TIME_NS
  35. struct vdso_data *arch_get_vdso_data(void *vvar_page)
  36. {
  37. return (struct vdso_data *)(vvar_page);
  38. }
  39. static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  40. {
  41. if (likely(vma->vm_mm == current->mm))
  42. return current->nsproxy->time_ns->vvar_page;
  43. /*
  44. * VM_PFNMAP | VM_IO protect .fault() handler from being called
  45. * through interfaces like /proc/$pid/mem or
  46. * process_vm_{readv,writev}() as long as there's no .access()
  47. * in special_mapping_vmops().
  48. * For more details check_vma_flags() and __access_remote_vm()
  49. */
  50. WARN(1, "vvar_page accessed remotely");
  51. return NULL;
  52. }
  53. /*
  54. * The VVAR page layout depends on whether a task belongs to the root or
  55. * non-root time namespace. Whenever a task changes its namespace, the VVAR
  56. * page tables are cleared and then they will be re-faulted with a
  57. * corresponding layout.
  58. * See also the comment near timens_setup_vdso_data() for details.
  59. */
  60. int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
  61. {
  62. struct mm_struct *mm = task->mm;
  63. VMA_ITERATOR(vmi, mm, 0);
  64. struct vm_area_struct *vma;
  65. mmap_read_lock(mm);
  66. for_each_vma(vmi, vma) {
  67. unsigned long size = vma->vm_end - vma->vm_start;
  68. if (!vma_is_special_mapping(vma, &vvar_mapping))
  69. continue;
  70. zap_page_range(vma, vma->vm_start, size);
  71. break;
  72. }
  73. mmap_read_unlock(mm);
  74. return 0;
  75. }
  76. #else
  77. static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
  78. {
  79. return NULL;
  80. }
  81. #endif
  82. static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
  83. struct vm_area_struct *vma, struct vm_fault *vmf)
  84. {
  85. struct page *timens_page = find_timens_vvar_page(vma);
  86. unsigned long addr, pfn;
  87. vm_fault_t err;
  88. switch (vmf->pgoff) {
  89. case VVAR_DATA_PAGE_OFFSET:
  90. pfn = virt_to_pfn(vdso_data);
  91. if (timens_page) {
  92. /*
  93. * Fault in VVAR page too, since it will be accessed
  94. * to get clock data anyway.
  95. */
  96. addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
  97. err = vmf_insert_pfn(vma, addr, pfn);
  98. if (unlikely(err & VM_FAULT_ERROR))
  99. return err;
  100. pfn = page_to_pfn(timens_page);
  101. }
  102. break;
  103. #ifdef CONFIG_TIME_NS
  104. case VVAR_TIMENS_PAGE_OFFSET:
  105. /*
  106. * If a task belongs to a time namespace then a namespace
  107. * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
  108. * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
  109. * offset.
  110. * See also the comment near timens_setup_vdso_data().
  111. */
  112. if (!timens_page)
  113. return VM_FAULT_SIGBUS;
  114. pfn = virt_to_pfn(vdso_data);
  115. break;
  116. #endif /* CONFIG_TIME_NS */
  117. default:
  118. return VM_FAULT_SIGBUS;
  119. }
  120. return vmf_insert_pfn(vma, vmf->address, pfn);
  121. }
  122. static int vdso_mremap(const struct vm_special_mapping *sm,
  123. struct vm_area_struct *vma)
  124. {
  125. current->mm->context.vdso_base = vma->vm_start;
  126. return 0;
  127. }
  128. static struct vm_special_mapping vvar_mapping = {
  129. .name = "[vvar]",
  130. .fault = vvar_fault,
  131. };
  132. static struct vm_special_mapping vdso64_mapping = {
  133. .name = "[vdso]",
  134. .mremap = vdso_mremap,
  135. };
  136. static struct vm_special_mapping vdso32_mapping = {
  137. .name = "[vdso]",
  138. .mremap = vdso_mremap,
  139. };
  140. int vdso_getcpu_init(void)
  141. {
  142. set_tod_programmable_field(smp_processor_id());
  143. return 0;
  144. }
  145. early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
  146. static int map_vdso(unsigned long addr, unsigned long vdso_mapping_len)
  147. {
  148. unsigned long vvar_start, vdso_text_start, vdso_text_len;
  149. struct vm_special_mapping *vdso_mapping;
  150. struct mm_struct *mm = current->mm;
  151. struct vm_area_struct *vma;
  152. int rc;
  153. BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
  154. if (mmap_write_lock_killable(mm))
  155. return -EINTR;
  156. if (is_compat_task()) {
  157. vdso_text_len = vdso32_end - vdso32_start;
  158. vdso_mapping = &vdso32_mapping;
  159. } else {
  160. vdso_text_len = vdso64_end - vdso64_start;
  161. vdso_mapping = &vdso64_mapping;
  162. }
  163. vvar_start = get_unmapped_area(NULL, addr, vdso_mapping_len, 0, 0);
  164. rc = vvar_start;
  165. if (IS_ERR_VALUE(vvar_start))
  166. goto out;
  167. vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
  168. VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
  169. VM_PFNMAP,
  170. &vvar_mapping);
  171. rc = PTR_ERR(vma);
  172. if (IS_ERR(vma))
  173. goto out;
  174. vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
  175. /* VM_MAYWRITE for COW so gdb can set breakpoints */
  176. vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
  177. VM_READ|VM_EXEC|
  178. VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
  179. vdso_mapping);
  180. if (IS_ERR(vma)) {
  181. do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
  182. rc = PTR_ERR(vma);
  183. } else {
  184. current->mm->context.vdso_base = vdso_text_start;
  185. rc = 0;
  186. }
  187. out:
  188. mmap_write_unlock(mm);
  189. return rc;
  190. }
  191. static unsigned long vdso_addr(unsigned long start, unsigned long len)
  192. {
  193. unsigned long addr, end, offset;
  194. /*
  195. * Round up the start address. It can start out unaligned as a result
  196. * of stack start randomization.
  197. */
  198. start = PAGE_ALIGN(start);
  199. /* Round the lowest possible end address up to a PMD boundary. */
  200. end = (start + len + PMD_SIZE - 1) & PMD_MASK;
  201. if (end >= VDSO_BASE)
  202. end = VDSO_BASE;
  203. end -= len;
  204. if (end > start) {
  205. offset = prandom_u32_max(((end - start) >> PAGE_SHIFT) + 1);
  206. addr = start + (offset << PAGE_SHIFT);
  207. } else {
  208. addr = start;
  209. }
  210. return addr;
  211. }
  212. unsigned long vdso_size(void)
  213. {
  214. unsigned long size = VVAR_NR_PAGES * PAGE_SIZE;
  215. if (is_compat_task())
  216. size += vdso32_end - vdso32_start;
  217. else
  218. size += vdso64_end - vdso64_start;
  219. return PAGE_ALIGN(size);
  220. }
  221. int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
  222. {
  223. unsigned long addr = VDSO_BASE;
  224. unsigned long size = vdso_size();
  225. if (current->flags & PF_RANDOMIZE)
  226. addr = vdso_addr(current->mm->start_stack + PAGE_SIZE, size);
  227. return map_vdso(addr, size);
  228. }
  229. static struct page ** __init vdso_setup_pages(void *start, void *end)
  230. {
  231. int pages = (end - start) >> PAGE_SHIFT;
  232. struct page **pagelist;
  233. int i;
  234. pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
  235. if (!pagelist)
  236. panic("%s: Cannot allocate page list for VDSO", __func__);
  237. for (i = 0; i < pages; i++)
  238. pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
  239. return pagelist;
  240. }
  241. static int __init vdso_init(void)
  242. {
  243. vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end);
  244. if (IS_ENABLED(CONFIG_COMPAT))
  245. vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end);
  246. return 0;
  247. }
  248. arch_initcall(vdso_init);