machine_kexec_32.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * handle transition of Linux booting another kernel
  4. * Copyright (C) 2002-2005 Eric Biederman <[email protected]>
  5. */
  6. #include <linux/mm.h>
  7. #include <linux/kexec.h>
  8. #include <linux/delay.h>
  9. #include <linux/numa.h>
  10. #include <linux/ftrace.h>
  11. #include <linux/suspend.h>
  12. #include <linux/gfp.h>
  13. #include <linux/io.h>
  14. #include <asm/pgalloc.h>
  15. #include <asm/tlbflush.h>
  16. #include <asm/mmu_context.h>
  17. #include <asm/apic.h>
  18. #include <asm/io_apic.h>
  19. #include <asm/cpufeature.h>
  20. #include <asm/desc.h>
  21. #include <asm/set_memory.h>
  22. #include <asm/debugreg.h>
  23. static void load_segments(void)
  24. {
  25. #define __STR(X) #X
  26. #define STR(X) __STR(X)
  27. __asm__ __volatile__ (
  28. "\tljmp $"STR(__KERNEL_CS)",$1f\n"
  29. "\t1:\n"
  30. "\tmovl $"STR(__KERNEL_DS)",%%eax\n"
  31. "\tmovl %%eax,%%ds\n"
  32. "\tmovl %%eax,%%es\n"
  33. "\tmovl %%eax,%%ss\n"
  34. : : : "eax", "memory");
  35. #undef STR
  36. #undef __STR
  37. }
  38. static void machine_kexec_free_page_tables(struct kimage *image)
  39. {
  40. free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER);
  41. image->arch.pgd = NULL;
  42. #ifdef CONFIG_X86_PAE
  43. free_page((unsigned long)image->arch.pmd0);
  44. image->arch.pmd0 = NULL;
  45. free_page((unsigned long)image->arch.pmd1);
  46. image->arch.pmd1 = NULL;
  47. #endif
  48. free_page((unsigned long)image->arch.pte0);
  49. image->arch.pte0 = NULL;
  50. free_page((unsigned long)image->arch.pte1);
  51. image->arch.pte1 = NULL;
  52. }
  53. static int machine_kexec_alloc_page_tables(struct kimage *image)
  54. {
  55. image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  56. PGD_ALLOCATION_ORDER);
  57. #ifdef CONFIG_X86_PAE
  58. image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
  59. image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
  60. #endif
  61. image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL);
  62. image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL);
  63. if (!image->arch.pgd ||
  64. #ifdef CONFIG_X86_PAE
  65. !image->arch.pmd0 || !image->arch.pmd1 ||
  66. #endif
  67. !image->arch.pte0 || !image->arch.pte1) {
  68. return -ENOMEM;
  69. }
  70. return 0;
  71. }
  72. static void machine_kexec_page_table_set_one(
  73. pgd_t *pgd, pmd_t *pmd, pte_t *pte,
  74. unsigned long vaddr, unsigned long paddr)
  75. {
  76. p4d_t *p4d;
  77. pud_t *pud;
  78. pgd += pgd_index(vaddr);
  79. #ifdef CONFIG_X86_PAE
  80. if (!(pgd_val(*pgd) & _PAGE_PRESENT))
  81. set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT));
  82. #endif
  83. p4d = p4d_offset(pgd, vaddr);
  84. pud = pud_offset(p4d, vaddr);
  85. pmd = pmd_offset(pud, vaddr);
  86. if (!(pmd_val(*pmd) & _PAGE_PRESENT))
  87. set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE));
  88. pte = pte_offset_kernel(pmd, vaddr);
  89. set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
  90. }
  91. static void machine_kexec_prepare_page_tables(struct kimage *image)
  92. {
  93. void *control_page;
  94. pmd_t *pmd = NULL;
  95. control_page = page_address(image->control_code_page);
  96. #ifdef CONFIG_X86_PAE
  97. pmd = image->arch.pmd0;
  98. #endif
  99. machine_kexec_page_table_set_one(
  100. image->arch.pgd, pmd, image->arch.pte0,
  101. (unsigned long)control_page, __pa(control_page));
  102. #ifdef CONFIG_X86_PAE
  103. pmd = image->arch.pmd1;
  104. #endif
  105. machine_kexec_page_table_set_one(
  106. image->arch.pgd, pmd, image->arch.pte1,
  107. __pa(control_page), __pa(control_page));
  108. }
  109. /*
  110. * A architecture hook called to validate the
  111. * proposed image and prepare the control pages
  112. * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE
  113. * have been allocated, but the segments have yet
  114. * been copied into the kernel.
  115. *
  116. * Do what every setup is needed on image and the
  117. * reboot code buffer to allow us to avoid allocations
  118. * later.
  119. *
  120. * - Make control page executable.
  121. * - Allocate page tables
  122. * - Setup page tables
  123. */
  124. int machine_kexec_prepare(struct kimage *image)
  125. {
  126. int error;
  127. set_memory_x((unsigned long)page_address(image->control_code_page), 1);
  128. error = machine_kexec_alloc_page_tables(image);
  129. if (error)
  130. return error;
  131. machine_kexec_prepare_page_tables(image);
  132. return 0;
  133. }
  134. /*
  135. * Undo anything leftover by machine_kexec_prepare
  136. * when an image is freed.
  137. */
  138. void machine_kexec_cleanup(struct kimage *image)
  139. {
  140. set_memory_nx((unsigned long)page_address(image->control_code_page), 1);
  141. machine_kexec_free_page_tables(image);
  142. }
  143. /*
  144. * Do not allocate memory (or fail in any way) in machine_kexec().
  145. * We are past the point of no return, committed to rebooting now.
  146. */
  147. void machine_kexec(struct kimage *image)
  148. {
  149. unsigned long page_list[PAGES_NR];
  150. void *control_page;
  151. int save_ftrace_enabled;
  152. asmlinkage unsigned long
  153. (*relocate_kernel_ptr)(unsigned long indirection_page,
  154. unsigned long control_page,
  155. unsigned long start_address,
  156. unsigned int has_pae,
  157. unsigned int preserve_context);
  158. #ifdef CONFIG_KEXEC_JUMP
  159. if (image->preserve_context)
  160. save_processor_state();
  161. #endif
  162. save_ftrace_enabled = __ftrace_enabled_save();
  163. /* Interrupts aren't acceptable while we reboot */
  164. local_irq_disable();
  165. hw_breakpoint_disable();
  166. if (image->preserve_context) {
  167. #ifdef CONFIG_X86_IO_APIC
  168. /*
  169. * We need to put APICs in legacy mode so that we can
  170. * get timer interrupts in second kernel. kexec/kdump
  171. * paths already have calls to restore_boot_irq_mode()
  172. * in one form or other. kexec jump path also need one.
  173. */
  174. clear_IO_APIC();
  175. restore_boot_irq_mode();
  176. #endif
  177. }
  178. control_page = page_address(image->control_code_page);
  179. memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
  180. relocate_kernel_ptr = control_page;
  181. page_list[PA_CONTROL_PAGE] = __pa(control_page);
  182. page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
  183. page_list[PA_PGD] = __pa(image->arch.pgd);
  184. if (image->type == KEXEC_TYPE_DEFAULT)
  185. page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
  186. << PAGE_SHIFT);
  187. /*
  188. * The segment registers are funny things, they have both a
  189. * visible and an invisible part. Whenever the visible part is
  190. * set to a specific selector, the invisible part is loaded
  191. * with from a table in memory. At no other time is the
  192. * descriptor table in memory accessed.
  193. *
  194. * I take advantage of this here by force loading the
  195. * segments, before I zap the gdt with an invalid value.
  196. */
  197. load_segments();
  198. /*
  199. * The gdt & idt are now invalid.
  200. * If you want to load them you must set up your own idt & gdt.
  201. */
  202. native_idt_invalidate();
  203. native_gdt_invalidate();
  204. /* now call it */
  205. image->start = relocate_kernel_ptr((unsigned long)image->head,
  206. (unsigned long)page_list,
  207. image->start,
  208. boot_cpu_has(X86_FEATURE_PAE),
  209. image->preserve_context);
  210. #ifdef CONFIG_KEXEC_JUMP
  211. if (image->preserve_context)
  212. restore_processor_state();
  213. #endif
  214. __ftrace_enabled_restore(save_ftrace_enabled);
  215. }