machine_kexec.c 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * kexec for arm64
  4. *
  5. * Copyright (C) Linaro.
  6. * Copyright (C) Huawei Futurewei Technologies.
  7. */
  8. #include <linux/interrupt.h>
  9. #include <linux/irq.h>
  10. #include <linux/kernel.h>
  11. #include <linux/kexec.h>
  12. #include <linux/page-flags.h>
  13. #include <linux/set_memory.h>
  14. #include <linux/smp.h>
  15. #include <asm/cacheflush.h>
  16. #include <asm/cpu_ops.h>
  17. #include <asm/daifflags.h>
  18. #include <asm/memory.h>
  19. #include <asm/mmu.h>
  20. #include <asm/mmu_context.h>
  21. #include <asm/page.h>
  22. #include <asm/sections.h>
  23. #include <asm/trans_pgd.h>
  24. /**
  25. * kexec_image_info - For debugging output.
  26. */
  27. #define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
  28. static void _kexec_image_info(const char *func, int line,
  29. const struct kimage *kimage)
  30. {
  31. unsigned long i;
  32. pr_debug("%s:%d:\n", func, line);
  33. pr_debug(" kexec kimage info:\n");
  34. pr_debug(" type: %d\n", kimage->type);
  35. pr_debug(" start: %lx\n", kimage->start);
  36. pr_debug(" head: %lx\n", kimage->head);
  37. pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
  38. pr_debug(" dtb_mem: %pa\n", &kimage->arch.dtb_mem);
  39. pr_debug(" kern_reloc: %pa\n", &kimage->arch.kern_reloc);
  40. pr_debug(" el2_vectors: %pa\n", &kimage->arch.el2_vectors);
  41. for (i = 0; i < kimage->nr_segments; i++) {
  42. pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
  43. i,
  44. kimage->segment[i].mem,
  45. kimage->segment[i].mem + kimage->segment[i].memsz,
  46. kimage->segment[i].memsz,
  47. kimage->segment[i].memsz / PAGE_SIZE);
  48. }
  49. }
  50. void machine_kexec_cleanup(struct kimage *kimage)
  51. {
  52. /* Empty routine needed to avoid build errors. */
  53. }
  54. /**
  55. * machine_kexec_prepare - Prepare for a kexec reboot.
  56. *
  57. * Called from the core kexec code when a kernel image is loaded.
  58. * Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
  59. * are stuck in the kernel. This avoids a panic once we hit machine_kexec().
  60. */
  61. int machine_kexec_prepare(struct kimage *kimage)
  62. {
  63. if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
  64. pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
  65. return -EBUSY;
  66. }
  67. return 0;
  68. }
  69. /**
  70. * kexec_segment_flush - Helper to flush the kimage segments to PoC.
  71. */
  72. static void kexec_segment_flush(const struct kimage *kimage)
  73. {
  74. unsigned long i;
  75. pr_debug("%s:\n", __func__);
  76. for (i = 0; i < kimage->nr_segments; i++) {
  77. pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
  78. i,
  79. kimage->segment[i].mem,
  80. kimage->segment[i].mem + kimage->segment[i].memsz,
  81. kimage->segment[i].memsz,
  82. kimage->segment[i].memsz / PAGE_SIZE);
  83. dcache_clean_inval_poc(
  84. (unsigned long)phys_to_virt(kimage->segment[i].mem),
  85. (unsigned long)phys_to_virt(kimage->segment[i].mem) +
  86. kimage->segment[i].memsz);
  87. }
  88. }
  89. /* Allocates pages for kexec page table */
  90. static void *kexec_page_alloc(void *arg)
  91. {
  92. struct kimage *kimage = (struct kimage *)arg;
  93. struct page *page = kimage_alloc_control_pages(kimage, 0);
  94. void *vaddr = NULL;
  95. if (!page)
  96. return NULL;
  97. vaddr = page_address(page);
  98. memset(vaddr, 0, PAGE_SIZE);
  99. return vaddr;
  100. }
  101. int machine_kexec_post_load(struct kimage *kimage)
  102. {
  103. int rc;
  104. pgd_t *trans_pgd;
  105. void *reloc_code = page_to_virt(kimage->control_code_page);
  106. long reloc_size;
  107. struct trans_pgd_info info = {
  108. .trans_alloc_page = kexec_page_alloc,
  109. .trans_alloc_arg = kimage,
  110. };
  111. /* If in place, relocation is not used, only flush next kernel */
  112. if (kimage->head & IND_DONE) {
  113. kexec_segment_flush(kimage);
  114. kexec_image_info(kimage);
  115. return 0;
  116. }
  117. kimage->arch.el2_vectors = 0;
  118. if (is_hyp_nvhe()) {
  119. rc = trans_pgd_copy_el2_vectors(&info,
  120. &kimage->arch.el2_vectors);
  121. if (rc)
  122. return rc;
  123. }
  124. /* Create a copy of the linear map */
  125. trans_pgd = kexec_page_alloc(kimage);
  126. if (!trans_pgd)
  127. return -ENOMEM;
  128. rc = trans_pgd_create_copy(&info, &trans_pgd, PAGE_OFFSET, PAGE_END);
  129. if (rc)
  130. return rc;
  131. kimage->arch.ttbr1 = __pa(trans_pgd);
  132. kimage->arch.zero_page = __pa_symbol(empty_zero_page);
  133. reloc_size = __relocate_new_kernel_end - __relocate_new_kernel_start;
  134. memcpy(reloc_code, __relocate_new_kernel_start, reloc_size);
  135. kimage->arch.kern_reloc = __pa(reloc_code);
  136. rc = trans_pgd_idmap_page(&info, &kimage->arch.ttbr0,
  137. &kimage->arch.t0sz, reloc_code);
  138. if (rc)
  139. return rc;
  140. kimage->arch.phys_offset = virt_to_phys(kimage) - (long)kimage;
  141. /* Flush the reloc_code in preparation for its execution. */
  142. dcache_clean_inval_poc((unsigned long)reloc_code,
  143. (unsigned long)reloc_code + reloc_size);
  144. icache_inval_pou((uintptr_t)reloc_code,
  145. (uintptr_t)reloc_code + reloc_size);
  146. kexec_image_info(kimage);
  147. return 0;
  148. }
  149. /**
  150. * machine_kexec - Do the kexec reboot.
  151. *
  152. * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
  153. */
  154. void machine_kexec(struct kimage *kimage)
  155. {
  156. bool in_kexec_crash = (kimage == kexec_crash_image);
  157. bool stuck_cpus = cpus_are_stuck_in_kernel();
  158. /*
  159. * New cpus may have become stuck_in_kernel after we loaded the image.
  160. */
  161. BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
  162. WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
  163. "Some CPUs may be stale, kdump will be unreliable.\n");
  164. pr_info("Bye!\n");
  165. local_daif_mask();
  166. /*
  167. * Both restart and kernel_reloc will shutdown the MMU, disable data
  168. * caches. However, restart will start new kernel or purgatory directly,
  169. * kernel_reloc contains the body of arm64_relocate_new_kernel
  170. * In kexec case, kimage->start points to purgatory assuming that
  171. * kernel entry and dtb address are embedded in purgatory by
  172. * userspace (kexec-tools).
  173. * In kexec_file case, the kernel starts directly without purgatory.
  174. */
  175. if (kimage->head & IND_DONE) {
  176. typeof(cpu_soft_restart) *restart;
  177. cpu_install_idmap();
  178. restart = (void *)__pa_symbol(cpu_soft_restart);
  179. restart(is_hyp_nvhe(), kimage->start, kimage->arch.dtb_mem,
  180. 0, 0);
  181. } else {
  182. void (*kernel_reloc)(struct kimage *kimage);
  183. if (is_hyp_nvhe())
  184. __hyp_set_vectors(kimage->arch.el2_vectors);
  185. cpu_install_ttbr0(kimage->arch.ttbr0, kimage->arch.t0sz);
  186. kernel_reloc = (void *)kimage->arch.kern_reloc;
  187. kernel_reloc(kimage);
  188. }
  189. BUG(); /* Should never get here. */
  190. }
  191. static void machine_kexec_mask_interrupts(void)
  192. {
  193. unsigned int i;
  194. struct irq_desc *desc;
  195. for_each_irq_desc(i, desc) {
  196. struct irq_chip *chip;
  197. int ret;
  198. chip = irq_desc_get_chip(desc);
  199. if (!chip)
  200. continue;
  201. /*
  202. * First try to remove the active state. If this
  203. * fails, try to EOI the interrupt.
  204. */
  205. ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
  206. if (ret && irqd_irq_inprogress(&desc->irq_data) &&
  207. chip->irq_eoi)
  208. chip->irq_eoi(&desc->irq_data);
  209. if (chip->irq_mask)
  210. chip->irq_mask(&desc->irq_data);
  211. if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
  212. chip->irq_disable(&desc->irq_data);
  213. }
  214. }
  215. /**
  216. * machine_crash_shutdown - shutdown non-crashing cpus and save registers
  217. */
  218. void machine_crash_shutdown(struct pt_regs *regs)
  219. {
  220. local_irq_disable();
  221. /* shutdown non-crashing cpus */
  222. crash_smp_send_stop();
  223. /* for crashing cpu */
  224. crash_save_cpu(regs, smp_processor_id());
  225. machine_kexec_mask_interrupts();
  226. pr_info("Starting crashdump kernel...\n");
  227. }
  228. void arch_kexec_protect_crashkres(void)
  229. {
  230. int i;
  231. for (i = 0; i < kexec_crash_image->nr_segments; i++)
  232. set_memory_valid(
  233. __phys_to_virt(kexec_crash_image->segment[i].mem),
  234. kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
  235. }
  236. void arch_kexec_unprotect_crashkres(void)
  237. {
  238. int i;
  239. for (i = 0; i < kexec_crash_image->nr_segments; i++)
  240. set_memory_valid(
  241. __phys_to_virt(kexec_crash_image->segment[i].mem),
  242. kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
  243. }
  244. #ifdef CONFIG_HIBERNATION
  245. /*
  246. * To preserve the crash dump kernel image, the relevant memory segments
  247. * should be mapped again around the hibernation.
  248. */
  249. void crash_prepare_suspend(void)
  250. {
  251. if (kexec_crash_image)
  252. arch_kexec_unprotect_crashkres();
  253. }
  254. void crash_post_resume(void)
  255. {
  256. if (kexec_crash_image)
  257. arch_kexec_protect_crashkres();
  258. }
  259. /*
  260. * crash_is_nosave
  261. *
  262. * Return true only if a page is part of reserved memory for crash dump kernel,
  263. * but does not hold any data of loaded kernel image.
  264. *
  265. * Note that all the pages in crash dump kernel memory have been initially
  266. * marked as Reserved as memory was allocated via memblock_reserve().
  267. *
  268. * In hibernation, the pages which are Reserved and yet "nosave" are excluded
  269. * from the hibernation iamge. crash_is_nosave() does thich check for crash
  270. * dump kernel and will reduce the total size of hibernation image.
  271. */
  272. bool crash_is_nosave(unsigned long pfn)
  273. {
  274. int i;
  275. phys_addr_t addr;
  276. if (!crashk_res.end)
  277. return false;
  278. /* in reserved memory? */
  279. addr = __pfn_to_phys(pfn);
  280. if ((addr < crashk_res.start) || (crashk_res.end < addr)) {
  281. if (!crashk_low_res.end)
  282. return false;
  283. if ((addr < crashk_low_res.start) || (crashk_low_res.end < addr))
  284. return false;
  285. }
  286. if (!kexec_crash_image)
  287. return true;
  288. /* not part of loaded kernel image? */
  289. for (i = 0; i < kexec_crash_image->nr_segments; i++)
  290. if (addr >= kexec_crash_image->segment[i].mem &&
  291. addr < (kexec_crash_image->segment[i].mem +
  292. kexec_crash_image->segment[i].memsz))
  293. return false;
  294. return true;
  295. }
  296. void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
  297. {
  298. unsigned long addr;
  299. struct page *page;
  300. for (addr = begin; addr < end; addr += PAGE_SIZE) {
  301. page = phys_to_page(addr);
  302. free_reserved_page(page);
  303. }
  304. }
  305. #endif /* CONFIG_HIBERNATION */