init.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Initialize MMU support.
  4. *
  5. * Copyright (C) 1998-2003 Hewlett-Packard Co
  6. * David Mosberger-Tang <[email protected]>
  7. */
  8. #include <linux/kernel.h>
  9. #include <linux/init.h>
  10. #include <linux/dma-map-ops.h>
  11. #include <linux/dmar.h>
  12. #include <linux/efi.h>
  13. #include <linux/elf.h>
  14. #include <linux/memblock.h>
  15. #include <linux/mm.h>
  16. #include <linux/sched/signal.h>
  17. #include <linux/mmzone.h>
  18. #include <linux/module.h>
  19. #include <linux/personality.h>
  20. #include <linux/reboot.h>
  21. #include <linux/slab.h>
  22. #include <linux/swap.h>
  23. #include <linux/proc_fs.h>
  24. #include <linux/bitops.h>
  25. #include <linux/kexec.h>
  26. #include <linux/swiotlb.h>
  27. #include <asm/dma.h>
  28. #include <asm/efi.h>
  29. #include <asm/io.h>
  30. #include <asm/numa.h>
  31. #include <asm/patch.h>
  32. #include <asm/pgalloc.h>
  33. #include <asm/sal.h>
  34. #include <asm/sections.h>
  35. #include <asm/tlb.h>
  36. #include <linux/uaccess.h>
  37. #include <asm/unistd.h>
  38. #include <asm/mca.h>
  39. extern void ia64_tlb_init (void);
  40. unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
  41. struct page *zero_page_memmap_ptr; /* map entry for zero page */
  42. EXPORT_SYMBOL(zero_page_memmap_ptr);
  43. void
  44. __ia64_sync_icache_dcache (pte_t pte)
  45. {
  46. unsigned long addr;
  47. struct page *page;
  48. page = pte_page(pte);
  49. addr = (unsigned long) page_address(page);
  50. if (test_bit(PG_arch_1, &page->flags))
  51. return; /* i-cache is already coherent with d-cache */
  52. flush_icache_range(addr, addr + page_size(page));
  53. set_bit(PG_arch_1, &page->flags); /* mark page as clean */
  54. }
  55. /*
  56. * Since DMA is i-cache coherent, any (complete) pages that were written via
  57. * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
  58. * flush them when they get mapped into an executable vm-area.
  59. */
  60. void arch_dma_mark_clean(phys_addr_t paddr, size_t size)
  61. {
  62. unsigned long pfn = PHYS_PFN(paddr);
  63. do {
  64. set_bit(PG_arch_1, &pfn_to_page(pfn)->flags);
  65. } while (++pfn <= PHYS_PFN(paddr + size - 1));
  66. }
  67. inline void
  68. ia64_set_rbs_bot (void)
  69. {
  70. unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16;
  71. if (stack_size > MAX_USER_STACK_SIZE)
  72. stack_size = MAX_USER_STACK_SIZE;
  73. current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size);
  74. }
  75. /*
  76. * This performs some platform-dependent address space initialization.
  77. * On IA-64, we want to setup the VM area for the register backing
  78. * store (which grows upwards) and install the gateway page which is
  79. * used for signal trampolines, etc.
  80. */
  81. void
  82. ia64_init_addr_space (void)
  83. {
  84. struct vm_area_struct *vma;
  85. ia64_set_rbs_bot();
  86. /*
  87. * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
  88. * the problem. When the process attempts to write to the register backing store
  89. * for the first time, it will get a SEGFAULT in this case.
  90. */
  91. vma = vm_area_alloc(current->mm);
  92. if (vma) {
  93. vma_set_anonymous(vma);
  94. vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
  95. vma->vm_end = vma->vm_start + PAGE_SIZE;
  96. vm_flags_init(vma, VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT);
  97. vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
  98. mmap_write_lock(current->mm);
  99. if (insert_vm_struct(current->mm, vma)) {
  100. mmap_write_unlock(current->mm);
  101. vm_area_free(vma);
  102. return;
  103. }
  104. mmap_write_unlock(current->mm);
  105. }
  106. /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
  107. if (!(current->personality & MMAP_PAGE_ZERO)) {
  108. vma = vm_area_alloc(current->mm);
  109. if (vma) {
  110. vma_set_anonymous(vma);
  111. vma->vm_end = PAGE_SIZE;
  112. vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
  113. vm_flags_init(vma, VM_READ | VM_MAYREAD | VM_IO |
  114. VM_DONTEXPAND | VM_DONTDUMP);
  115. mmap_write_lock(current->mm);
  116. if (insert_vm_struct(current->mm, vma)) {
  117. mmap_write_unlock(current->mm);
  118. vm_area_free(vma);
  119. return;
  120. }
  121. mmap_write_unlock(current->mm);
  122. }
  123. }
  124. }
  125. void
  126. free_initmem (void)
  127. {
  128. free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end),
  129. -1, "unused kernel");
  130. }
  131. void __init
  132. free_initrd_mem (unsigned long start, unsigned long end)
  133. {
  134. /*
  135. * EFI uses 4KB pages while the kernel can use 4KB or bigger.
  136. * Thus EFI and the kernel may have different page sizes. It is
  137. * therefore possible to have the initrd share the same page as
  138. * the end of the kernel (given current setup).
  139. *
  140. * To avoid freeing/using the wrong page (kernel sized) we:
  141. * - align up the beginning of initrd
  142. * - align down the end of initrd
  143. *
  144. * | |
  145. * |=============| a000
  146. * | |
  147. * | |
  148. * | | 9000
  149. * |/////////////|
  150. * |/////////////|
  151. * |=============| 8000
  152. * |///INITRD////|
  153. * |/////////////|
  154. * |/////////////| 7000
  155. * | |
  156. * |KKKKKKKKKKKKK|
  157. * |=============| 6000
  158. * |KKKKKKKKKKKKK|
  159. * |KKKKKKKKKKKKK|
  160. * K=kernel using 8KB pages
  161. *
  162. * In this example, we must free page 8000 ONLY. So we must align up
  163. * initrd_start and keep initrd_end as is.
  164. */
  165. start = PAGE_ALIGN(start);
  166. end = end & PAGE_MASK;
  167. if (start < end)
  168. printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
  169. for (; start < end; start += PAGE_SIZE) {
  170. if (!virt_addr_valid(start))
  171. continue;
  172. free_reserved_page(virt_to_page(start));
  173. }
  174. }
  175. /*
  176. * This installs a clean page in the kernel's page table.
  177. */
  178. static struct page * __init
  179. put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
  180. {
  181. pgd_t *pgd;
  182. p4d_t *p4d;
  183. pud_t *pud;
  184. pmd_t *pmd;
  185. pte_t *pte;
  186. pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
  187. {
  188. p4d = p4d_alloc(&init_mm, pgd, address);
  189. if (!p4d)
  190. goto out;
  191. pud = pud_alloc(&init_mm, p4d, address);
  192. if (!pud)
  193. goto out;
  194. pmd = pmd_alloc(&init_mm, pud, address);
  195. if (!pmd)
  196. goto out;
  197. pte = pte_alloc_kernel(pmd, address);
  198. if (!pte)
  199. goto out;
  200. if (!pte_none(*pte))
  201. goto out;
  202. set_pte(pte, mk_pte(page, pgprot));
  203. }
  204. out:
  205. /* no need for flush_tlb */
  206. return page;
  207. }
  208. static void __init
  209. setup_gate (void)
  210. {
  211. struct page *page;
  212. /*
  213. * Map the gate page twice: once read-only to export the ELF
  214. * headers etc. and once execute-only page to enable
  215. * privilege-promotion via "epc":
  216. */
  217. page = virt_to_page(ia64_imva(__start_gate_section));
  218. put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
  219. #ifdef HAVE_BUGGY_SEGREL
  220. page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
  221. put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
  222. #else
  223. put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
  224. /* Fill in the holes (if any) with read-only zero pages: */
  225. {
  226. unsigned long addr;
  227. for (addr = GATE_ADDR + PAGE_SIZE;
  228. addr < GATE_ADDR + PERCPU_PAGE_SIZE;
  229. addr += PAGE_SIZE)
  230. {
  231. put_kernel_page(ZERO_PAGE(0), addr,
  232. PAGE_READONLY);
  233. put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE,
  234. PAGE_READONLY);
  235. }
  236. }
  237. #endif
  238. ia64_patch_gate();
  239. }
  240. static struct vm_area_struct gate_vma;
  241. static int __init gate_vma_init(void)
  242. {
  243. vma_init(&gate_vma, NULL);
  244. gate_vma.vm_start = FIXADDR_USER_START;
  245. gate_vma.vm_end = FIXADDR_USER_END;
  246. vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC);
  247. gate_vma.vm_page_prot = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX);
  248. return 0;
  249. }
  250. __initcall(gate_vma_init);
  251. struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
  252. {
  253. return &gate_vma;
  254. }
  255. int in_gate_area_no_mm(unsigned long addr)
  256. {
  257. if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
  258. return 1;
  259. return 0;
  260. }
  261. int in_gate_area(struct mm_struct *mm, unsigned long addr)
  262. {
  263. return in_gate_area_no_mm(addr);
  264. }
  265. void ia64_mmu_init(void *my_cpu_data)
  266. {
  267. unsigned long pta, impl_va_bits;
  268. extern void tlb_init(void);
  269. #ifdef CONFIG_DISABLE_VHPT
  270. # define VHPT_ENABLE_BIT 0
  271. #else
  272. # define VHPT_ENABLE_BIT 1
  273. #endif
  274. /*
  275. * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
  276. * address space. The IA-64 architecture guarantees that at least 50 bits of
  277. * virtual address space are implemented but if we pick a large enough page size
  278. * (e.g., 64KB), the mapped address space is big enough that it will overlap with
  279. * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages,
  280. * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
  281. * problem in practice. Alternatively, we could truncate the top of the mapped
  282. * address space to not permit mappings that would overlap with the VMLPT.
  283. * --davidm 00/12/06
  284. */
  285. # define pte_bits 3
  286. # define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
  287. /*
  288. * The virtual page table has to cover the entire implemented address space within
  289. * a region even though not all of this space may be mappable. The reason for
  290. * this is that the Access bit and Dirty bit fault handlers perform
  291. * non-speculative accesses to the virtual page table, so the address range of the
  292. * virtual page table itself needs to be covered by virtual page table.
  293. */
  294. # define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
  295. # define POW2(n) (1ULL << (n))
  296. impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
  297. if (impl_va_bits < 51 || impl_va_bits > 61)
  298. panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
  299. /*
  300. * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need,
  301. * which must fit into "vmlpt_bits - pte_bits" slots. Second half of
  302. * the test makes sure that our mapped space doesn't overlap the
  303. * unimplemented hole in the middle of the region.
  304. */
  305. if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) ||
  306. (mapped_space_bits > impl_va_bits - 1))
  307. panic("Cannot build a big enough virtual-linear page table"
  308. " to cover mapped address space.\n"
  309. " Try using a smaller page size.\n");
  310. /* place the VMLPT at the end of each page-table mapped region: */
  311. pta = POW2(61) - POW2(vmlpt_bits);
  312. /*
  313. * Set the (virtually mapped linear) page table address. Bit
  314. * 8 selects between the short and long format, bits 2-7 the
  315. * size of the table, and bit 0 whether the VHPT walker is
  316. * enabled.
  317. */
  318. ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
  319. ia64_tlb_init();
  320. #ifdef CONFIG_HUGETLB_PAGE
  321. ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
  322. ia64_srlz_d();
  323. #endif
  324. }
  325. int __init register_active_ranges(u64 start, u64 len, int nid)
  326. {
  327. u64 end = start + len;
  328. #ifdef CONFIG_KEXEC
  329. if (start > crashk_res.start && start < crashk_res.end)
  330. start = crashk_res.end;
  331. if (end > crashk_res.start && end < crashk_res.end)
  332. end = crashk_res.start;
  333. #endif
  334. if (start < end)
  335. memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE);
  336. return 0;
  337. }
  338. int
  339. find_max_min_low_pfn (u64 start, u64 end, void *arg)
  340. {
  341. unsigned long pfn_start, pfn_end;
  342. #ifdef CONFIG_FLATMEM
  343. pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT;
  344. pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT;
  345. #else
  346. pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT;
  347. pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT;
  348. #endif
  349. min_low_pfn = min(min_low_pfn, pfn_start);
  350. max_low_pfn = max(max_low_pfn, pfn_end);
  351. return 0;
  352. }
  353. /*
  354. * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
  355. * system call handler. When this option is in effect, all fsyscalls will end up bubbling
  356. * down into the kernel and calling the normal (heavy-weight) syscall handler. This is
  357. * useful for performance testing, but conceivably could also come in handy for debugging
  358. * purposes.
  359. */
  360. static int nolwsys __initdata;
  361. static int __init
  362. nolwsys_setup (char *s)
  363. {
  364. nolwsys = 1;
  365. return 1;
  366. }
  367. __setup("nolwsys", nolwsys_setup);
  368. void __init
  369. mem_init (void)
  370. {
  371. int i;
  372. BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE);
  373. BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE);
  374. BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE);
  375. /*
  376. * This needs to be called _after_ the command line has been parsed but
  377. * _before_ any drivers that may need the PCI DMA interface are
  378. * initialized or bootmem has been freed.
  379. */
  380. do {
  381. #ifdef CONFIG_INTEL_IOMMU
  382. detect_intel_iommu();
  383. if (iommu_detected)
  384. break;
  385. #endif
  386. swiotlb_init(true, SWIOTLB_VERBOSE);
  387. } while (0);
  388. #ifdef CONFIG_FLATMEM
  389. BUG_ON(!mem_map);
  390. #endif
  391. set_max_mapnr(max_low_pfn);
  392. high_memory = __va(max_low_pfn * PAGE_SIZE);
  393. memblock_free_all();
  394. /*
  395. * For fsyscall entrypoints with no light-weight handler, use the ordinary
  396. * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
  397. * code can tell them apart.
  398. */
  399. for (i = 0; i < NR_syscalls; ++i) {
  400. extern unsigned long fsyscall_table[NR_syscalls];
  401. extern unsigned long sys_call_table[NR_syscalls];
  402. if (!fsyscall_table[i] || nolwsys)
  403. fsyscall_table[i] = sys_call_table[i] | 1;
  404. }
  405. setup_gate();
  406. }
  407. #ifdef CONFIG_MEMORY_HOTPLUG
  408. int arch_add_memory(int nid, u64 start, u64 size,
  409. struct mhp_params *params)
  410. {
  411. unsigned long start_pfn = start >> PAGE_SHIFT;
  412. unsigned long nr_pages = size >> PAGE_SHIFT;
  413. int ret;
  414. if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
  415. return -EINVAL;
  416. ret = __add_pages(nid, start_pfn, nr_pages, params);
  417. if (ret)
  418. printk("%s: Problem encountered in __add_pages() as ret=%d\n",
  419. __func__, ret);
  420. return ret;
  421. }
  422. void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
  423. {
  424. unsigned long start_pfn = start >> PAGE_SHIFT;
  425. unsigned long nr_pages = size >> PAGE_SHIFT;
  426. __remove_pages(start_pfn, nr_pages, altmap);
  427. }
  428. #endif
  429. static const pgprot_t protection_map[16] = {
  430. [VM_NONE] = PAGE_NONE,
  431. [VM_READ] = PAGE_READONLY,
  432. [VM_WRITE] = PAGE_READONLY,
  433. [VM_WRITE | VM_READ] = PAGE_READONLY,
  434. [VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  435. _PAGE_AR_X_RX),
  436. [VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  437. _PAGE_AR_RX),
  438. [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC,
  439. [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC,
  440. [VM_SHARED] = PAGE_NONE,
  441. [VM_SHARED | VM_READ] = PAGE_READONLY,
  442. [VM_SHARED | VM_WRITE] = PAGE_SHARED,
  443. [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
  444. [VM_SHARED | VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  445. _PAGE_AR_X_RX),
  446. [VM_SHARED | VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  447. _PAGE_AR_RX),
  448. [VM_SHARED | VM_EXEC | VM_WRITE] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  449. _PAGE_AR_RWX),
  450. [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 |
  451. _PAGE_AR_RWX)
  452. };
  453. DECLARE_VM_GET_PAGE_PROT