kexec.c 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * kexec.c - kexec_load system call
  4. * Copyright (C) 2002-2004 Eric Biederman <[email protected]>
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/capability.h>
  8. #include <linux/mm.h>
  9. #include <linux/file.h>
  10. #include <linux/security.h>
  11. #include <linux/kexec.h>
  12. #include <linux/mutex.h>
  13. #include <linux/list.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/vmalloc.h>
  16. #include <linux/slab.h>
  17. #include "kexec_internal.h"
  18. static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
  19. unsigned long nr_segments,
  20. struct kexec_segment *segments,
  21. unsigned long flags)
  22. {
  23. int ret;
  24. struct kimage *image;
  25. bool kexec_on_panic = flags & KEXEC_ON_CRASH;
  26. if (kexec_on_panic) {
  27. /* Verify we have a valid entry point */
  28. if ((entry < phys_to_boot_phys(crashk_res.start)) ||
  29. (entry > phys_to_boot_phys(crashk_res.end)))
  30. return -EADDRNOTAVAIL;
  31. }
  32. /* Allocate and initialize a controlling structure */
  33. image = do_kimage_alloc_init();
  34. if (!image)
  35. return -ENOMEM;
  36. image->start = entry;
  37. image->nr_segments = nr_segments;
  38. memcpy(image->segment, segments, nr_segments * sizeof(*segments));
  39. if (kexec_on_panic) {
  40. /* Enable special crash kernel control page alloc policy. */
  41. image->control_page = crashk_res.start;
  42. image->type = KEXEC_TYPE_CRASH;
  43. }
  44. ret = sanity_check_segment_list(image);
  45. if (ret)
  46. goto out_free_image;
  47. /*
  48. * Find a location for the control code buffer, and add it
  49. * the vector of segments so that it's pages will also be
  50. * counted as destination pages.
  51. */
  52. ret = -ENOMEM;
  53. image->control_code_page = kimage_alloc_control_pages(image,
  54. get_order(KEXEC_CONTROL_PAGE_SIZE));
  55. if (!image->control_code_page) {
  56. pr_err("Could not allocate control_code_buffer\n");
  57. goto out_free_image;
  58. }
  59. if (!kexec_on_panic) {
  60. image->swap_page = kimage_alloc_control_pages(image, 0);
  61. if (!image->swap_page) {
  62. pr_err("Could not allocate swap buffer\n");
  63. goto out_free_control_pages;
  64. }
  65. }
  66. *rimage = image;
  67. return 0;
  68. out_free_control_pages:
  69. kimage_free_page_list(&image->control_pages);
  70. out_free_image:
  71. kfree(image);
  72. return ret;
  73. }
  74. static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
  75. struct kexec_segment *segments, unsigned long flags)
  76. {
  77. struct kimage **dest_image, *image;
  78. unsigned long i;
  79. int ret;
  80. /*
  81. * Because we write directly to the reserved memory region when loading
  82. * crash kernels we need a serialization here to prevent multiple crash
  83. * kernels from attempting to load simultaneously.
  84. */
  85. if (!kexec_trylock())
  86. return -EBUSY;
  87. if (flags & KEXEC_ON_CRASH) {
  88. dest_image = &kexec_crash_image;
  89. if (kexec_crash_image)
  90. arch_kexec_unprotect_crashkres();
  91. } else {
  92. dest_image = &kexec_image;
  93. }
  94. if (nr_segments == 0) {
  95. /* Uninstall image */
  96. kimage_free(xchg(dest_image, NULL));
  97. ret = 0;
  98. goto out_unlock;
  99. }
  100. if (flags & KEXEC_ON_CRASH) {
  101. /*
  102. * Loading another kernel to switch to if this one
  103. * crashes. Free any current crash dump kernel before
  104. * we corrupt it.
  105. */
  106. kimage_free(xchg(&kexec_crash_image, NULL));
  107. }
  108. ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
  109. if (ret)
  110. goto out_unlock;
  111. if (flags & KEXEC_PRESERVE_CONTEXT)
  112. image->preserve_context = 1;
  113. ret = machine_kexec_prepare(image);
  114. if (ret)
  115. goto out;
  116. /*
  117. * Some architecture(like S390) may touch the crash memory before
  118. * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
  119. */
  120. ret = kimage_crash_copy_vmcoreinfo(image);
  121. if (ret)
  122. goto out;
  123. for (i = 0; i < nr_segments; i++) {
  124. ret = kimage_load_segment(image, &image->segment[i]);
  125. if (ret)
  126. goto out;
  127. }
  128. kimage_terminate(image);
  129. ret = machine_kexec_post_load(image);
  130. if (ret)
  131. goto out;
  132. /* Install the new kernel and uninstall the old */
  133. image = xchg(dest_image, image);
  134. out:
  135. if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
  136. arch_kexec_protect_crashkres();
  137. kimage_free(image);
  138. out_unlock:
  139. kexec_unlock();
  140. return ret;
  141. }
  142. /*
  143. * Exec Kernel system call: for obvious reasons only root may call it.
  144. *
  145. * This call breaks up into three pieces.
  146. * - A generic part which loads the new kernel from the current
  147. * address space, and very carefully places the data in the
  148. * allocated pages.
  149. *
  150. * - A generic part that interacts with the kernel and tells all of
  151. * the devices to shut down. Preventing on-going dmas, and placing
  152. * the devices in a consistent state so a later kernel can
  153. * reinitialize them.
  154. *
  155. * - A machine specific part that includes the syscall number
  156. * and then copies the image to it's final destination. And
  157. * jumps into the image at entry.
  158. *
  159. * kexec does not sync, or unmount filesystems so if you need
  160. * that to happen you need to do that yourself.
  161. */
  162. static inline int kexec_load_check(unsigned long nr_segments,
  163. unsigned long flags)
  164. {
  165. int result;
  166. /* We only trust the superuser with rebooting the system. */
  167. if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
  168. return -EPERM;
  169. /* Permit LSMs and IMA to fail the kexec */
  170. result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false);
  171. if (result < 0)
  172. return result;
  173. /*
  174. * kexec can be used to circumvent module loading restrictions, so
  175. * prevent loading in that case
  176. */
  177. result = security_locked_down(LOCKDOWN_KEXEC);
  178. if (result)
  179. return result;
  180. /*
  181. * Verify we have a legal set of flags
  182. * This leaves us room for future extensions.
  183. */
  184. if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
  185. return -EINVAL;
  186. /* Put an artificial cap on the number
  187. * of segments passed to kexec_load.
  188. */
  189. if (nr_segments > KEXEC_SEGMENT_MAX)
  190. return -EINVAL;
  191. return 0;
  192. }
  193. SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
  194. struct kexec_segment __user *, segments, unsigned long, flags)
  195. {
  196. struct kexec_segment *ksegments;
  197. unsigned long result;
  198. result = kexec_load_check(nr_segments, flags);
  199. if (result)
  200. return result;
  201. /* Verify we are on the appropriate architecture */
  202. if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
  203. ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
  204. return -EINVAL;
  205. ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0]));
  206. if (IS_ERR(ksegments))
  207. return PTR_ERR(ksegments);
  208. result = do_kexec_load(entry, nr_segments, ksegments, flags);
  209. kfree(ksegments);
  210. return result;
  211. }
  212. #ifdef CONFIG_COMPAT
  213. COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
  214. compat_ulong_t, nr_segments,
  215. struct compat_kexec_segment __user *, segments,
  216. compat_ulong_t, flags)
  217. {
  218. struct compat_kexec_segment in;
  219. struct kexec_segment *ksegments;
  220. unsigned long i, result;
  221. result = kexec_load_check(nr_segments, flags);
  222. if (result)
  223. return result;
  224. /* Don't allow clients that don't understand the native
  225. * architecture to do anything.
  226. */
  227. if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
  228. return -EINVAL;
  229. ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]),
  230. GFP_KERNEL);
  231. if (!ksegments)
  232. return -ENOMEM;
  233. for (i = 0; i < nr_segments; i++) {
  234. result = copy_from_user(&in, &segments[i], sizeof(in));
  235. if (result)
  236. goto fail;
  237. ksegments[i].buf = compat_ptr(in.buf);
  238. ksegments[i].bufsz = in.bufsz;
  239. ksegments[i].mem = in.mem;
  240. ksegments[i].memsz = in.memsz;
  241. }
  242. result = do_kexec_load(entry, nr_segments, ksegments, flags);
  243. fail:
  244. kfree(ksegments);
  245. return result;
  246. }
  247. #endif