vmcore.c 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * fs/proc/vmcore.c Interface for accessing the crash
  4. * dump from the system's previous life.
  5. * Heavily borrowed from fs/proc/kcore.c
  6. * Created by: Hariprasad Nellitheertha ([email protected])
  7. * Copyright (C) IBM Corporation, 2004. All rights reserved
  8. *
  9. */
  10. #include <linux/mm.h>
  11. #include <linux/kcore.h>
  12. #include <linux/user.h>
  13. #include <linux/elf.h>
  14. #include <linux/elfcore.h>
  15. #include <linux/export.h>
  16. #include <linux/slab.h>
  17. #include <linux/highmem.h>
  18. #include <linux/printk.h>
  19. #include <linux/memblock.h>
  20. #include <linux/init.h>
  21. #include <linux/crash_dump.h>
  22. #include <linux/list.h>
  23. #include <linux/moduleparam.h>
  24. #include <linux/mutex.h>
  25. #include <linux/vmalloc.h>
  26. #include <linux/pagemap.h>
  27. #include <linux/uio.h>
  28. #include <linux/cc_platform.h>
  29. #include <asm/io.h>
  30. #include "internal.h"
  31. /* List representing chunks of contiguous memory areas and their offsets in
  32. * vmcore file.
  33. */
  34. static LIST_HEAD(vmcore_list);
  35. /* Stores the pointer to the buffer containing kernel elf core headers. */
  36. static char *elfcorebuf;
  37. static size_t elfcorebuf_sz;
  38. static size_t elfcorebuf_sz_orig;
  39. static char *elfnotes_buf;
  40. static size_t elfnotes_sz;
  41. /* Size of all notes minus the device dump notes */
  42. static size_t elfnotes_orig_sz;
  43. /* Total size of vmcore file. */
  44. static u64 vmcore_size;
  45. static struct proc_dir_entry *proc_vmcore;
  46. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  47. /* Device Dump list and mutex to synchronize access to list */
  48. static LIST_HEAD(vmcoredd_list);
  49. static DEFINE_MUTEX(vmcoredd_mutex);
  50. static bool vmcoredd_disabled;
  51. core_param(novmcoredd, vmcoredd_disabled, bool, 0);
  52. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  53. /* Device Dump Size */
  54. static size_t vmcoredd_orig_sz;
  55. static DEFINE_SPINLOCK(vmcore_cb_lock);
  56. DEFINE_STATIC_SRCU(vmcore_cb_srcu);
  57. /* List of registered vmcore callbacks. */
  58. static LIST_HEAD(vmcore_cb_list);
  59. /* Whether the vmcore has been opened once. */
  60. static bool vmcore_opened;
  61. void register_vmcore_cb(struct vmcore_cb *cb)
  62. {
  63. INIT_LIST_HEAD(&cb->next);
  64. spin_lock(&vmcore_cb_lock);
  65. list_add_tail(&cb->next, &vmcore_cb_list);
  66. /*
  67. * Registering a vmcore callback after the vmcore was opened is
  68. * very unusual (e.g., manual driver loading).
  69. */
  70. if (vmcore_opened)
  71. pr_warn_once("Unexpected vmcore callback registration\n");
  72. spin_unlock(&vmcore_cb_lock);
  73. }
  74. EXPORT_SYMBOL_GPL(register_vmcore_cb);
  75. void unregister_vmcore_cb(struct vmcore_cb *cb)
  76. {
  77. spin_lock(&vmcore_cb_lock);
  78. list_del_rcu(&cb->next);
  79. /*
  80. * Unregistering a vmcore callback after the vmcore was opened is
  81. * very unusual (e.g., forced driver removal), but we cannot stop
  82. * unregistering.
  83. */
  84. if (vmcore_opened)
  85. pr_warn_once("Unexpected vmcore callback unregistration\n");
  86. spin_unlock(&vmcore_cb_lock);
  87. synchronize_srcu(&vmcore_cb_srcu);
  88. }
  89. EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
  90. static bool pfn_is_ram(unsigned long pfn)
  91. {
  92. struct vmcore_cb *cb;
  93. bool ret = true;
  94. list_for_each_entry_srcu(cb, &vmcore_cb_list, next,
  95. srcu_read_lock_held(&vmcore_cb_srcu)) {
  96. if (unlikely(!cb->pfn_is_ram))
  97. continue;
  98. ret = cb->pfn_is_ram(cb, pfn);
  99. if (!ret)
  100. break;
  101. }
  102. return ret;
  103. }
  104. static int open_vmcore(struct inode *inode, struct file *file)
  105. {
  106. spin_lock(&vmcore_cb_lock);
  107. vmcore_opened = true;
  108. spin_unlock(&vmcore_cb_lock);
  109. return 0;
  110. }
  111. /* Reads a page from the oldmem device from given offset. */
  112. ssize_t read_from_oldmem(struct iov_iter *iter, size_t count,
  113. u64 *ppos, bool encrypted)
  114. {
  115. unsigned long pfn, offset;
  116. ssize_t nr_bytes;
  117. ssize_t read = 0, tmp;
  118. int idx;
  119. if (!count)
  120. return 0;
  121. offset = (unsigned long)(*ppos % PAGE_SIZE);
  122. pfn = (unsigned long)(*ppos / PAGE_SIZE);
  123. idx = srcu_read_lock(&vmcore_cb_srcu);
  124. do {
  125. if (count > (PAGE_SIZE - offset))
  126. nr_bytes = PAGE_SIZE - offset;
  127. else
  128. nr_bytes = count;
  129. /* If pfn is not ram, return zeros for sparse dump files */
  130. if (!pfn_is_ram(pfn)) {
  131. tmp = iov_iter_zero(nr_bytes, iter);
  132. } else {
  133. if (encrypted)
  134. tmp = copy_oldmem_page_encrypted(iter, pfn,
  135. nr_bytes,
  136. offset);
  137. else
  138. tmp = copy_oldmem_page(iter, pfn, nr_bytes,
  139. offset);
  140. }
  141. if (tmp < nr_bytes) {
  142. srcu_read_unlock(&vmcore_cb_srcu, idx);
  143. return -EFAULT;
  144. }
  145. *ppos += nr_bytes;
  146. count -= nr_bytes;
  147. read += nr_bytes;
  148. ++pfn;
  149. offset = 0;
  150. } while (count);
  151. srcu_read_unlock(&vmcore_cb_srcu, idx);
  152. return read;
  153. }
  154. /*
  155. * Architectures may override this function to allocate ELF header in 2nd kernel
  156. */
  157. int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
  158. {
  159. return 0;
  160. }
  161. /*
  162. * Architectures may override this function to free header
  163. */
  164. void __weak elfcorehdr_free(unsigned long long addr)
  165. {}
  166. /*
  167. * Architectures may override this function to read from ELF header
  168. */
  169. ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
  170. {
  171. struct kvec kvec = { .iov_base = buf, .iov_len = count };
  172. struct iov_iter iter;
  173. iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
  174. return read_from_oldmem(&iter, count, ppos, false);
  175. }
  176. /*
  177. * Architectures may override this function to read from notes sections
  178. */
  179. ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
  180. {
  181. struct kvec kvec = { .iov_base = buf, .iov_len = count };
  182. struct iov_iter iter;
  183. iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count);
  184. return read_from_oldmem(&iter, count, ppos,
  185. cc_platform_has(CC_ATTR_MEM_ENCRYPT));
  186. }
  187. /*
  188. * Architectures may override this function to map oldmem
  189. */
  190. int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
  191. unsigned long from, unsigned long pfn,
  192. unsigned long size, pgprot_t prot)
  193. {
  194. prot = pgprot_encrypted(prot);
  195. return remap_pfn_range(vma, from, pfn, size, prot);
  196. }
  197. /*
  198. * Architectures which support memory encryption override this.
  199. */
  200. ssize_t __weak copy_oldmem_page_encrypted(struct iov_iter *iter,
  201. unsigned long pfn, size_t csize, unsigned long offset)
  202. {
  203. return copy_oldmem_page(iter, pfn, csize, offset);
  204. }
  205. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  206. static int vmcoredd_copy_dumps(struct iov_iter *iter, u64 start, size_t size)
  207. {
  208. struct vmcoredd_node *dump;
  209. u64 offset = 0;
  210. int ret = 0;
  211. size_t tsz;
  212. char *buf;
  213. mutex_lock(&vmcoredd_mutex);
  214. list_for_each_entry(dump, &vmcoredd_list, list) {
  215. if (start < offset + dump->size) {
  216. tsz = min(offset + (u64)dump->size - start, (u64)size);
  217. buf = dump->buf + start - offset;
  218. if (copy_to_iter(buf, tsz, iter) < tsz) {
  219. ret = -EFAULT;
  220. goto out_unlock;
  221. }
  222. size -= tsz;
  223. start += tsz;
  224. /* Leave now if buffer filled already */
  225. if (!size)
  226. goto out_unlock;
  227. }
  228. offset += dump->size;
  229. }
  230. out_unlock:
  231. mutex_unlock(&vmcoredd_mutex);
  232. return ret;
  233. }
  234. #ifdef CONFIG_MMU
  235. static int vmcoredd_mmap_dumps(struct vm_area_struct *vma, unsigned long dst,
  236. u64 start, size_t size)
  237. {
  238. struct vmcoredd_node *dump;
  239. u64 offset = 0;
  240. int ret = 0;
  241. size_t tsz;
  242. char *buf;
  243. mutex_lock(&vmcoredd_mutex);
  244. list_for_each_entry(dump, &vmcoredd_list, list) {
  245. if (start < offset + dump->size) {
  246. tsz = min(offset + (u64)dump->size - start, (u64)size);
  247. buf = dump->buf + start - offset;
  248. if (remap_vmalloc_range_partial(vma, dst, buf, 0,
  249. tsz)) {
  250. ret = -EFAULT;
  251. goto out_unlock;
  252. }
  253. size -= tsz;
  254. start += tsz;
  255. dst += tsz;
  256. /* Leave now if buffer filled already */
  257. if (!size)
  258. goto out_unlock;
  259. }
  260. offset += dump->size;
  261. }
  262. out_unlock:
  263. mutex_unlock(&vmcoredd_mutex);
  264. return ret;
  265. }
  266. #endif /* CONFIG_MMU */
  267. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  268. /* Read from the ELF header and then the crash dump. On error, negative value is
  269. * returned otherwise number of bytes read are returned.
  270. */
  271. static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos)
  272. {
  273. ssize_t acc = 0, tmp;
  274. size_t tsz;
  275. u64 start;
  276. struct vmcore *m = NULL;
  277. if (!iov_iter_count(iter) || *fpos >= vmcore_size)
  278. return 0;
  279. iov_iter_truncate(iter, vmcore_size - *fpos);
  280. /* Read ELF core header */
  281. if (*fpos < elfcorebuf_sz) {
  282. tsz = min(elfcorebuf_sz - (size_t)*fpos, iov_iter_count(iter));
  283. if (copy_to_iter(elfcorebuf + *fpos, tsz, iter) < tsz)
  284. return -EFAULT;
  285. *fpos += tsz;
  286. acc += tsz;
  287. /* leave now if filled buffer already */
  288. if (!iov_iter_count(iter))
  289. return acc;
  290. }
  291. /* Read Elf note segment */
  292. if (*fpos < elfcorebuf_sz + elfnotes_sz) {
  293. void *kaddr;
  294. /* We add device dumps before other elf notes because the
  295. * other elf notes may not fill the elf notes buffer
  296. * completely and we will end up with zero-filled data
  297. * between the elf notes and the device dumps. Tools will
  298. * then try to decode this zero-filled data as valid notes
  299. * and we don't want that. Hence, adding device dumps before
  300. * the other elf notes ensure that zero-filled data can be
  301. * avoided.
  302. */
  303. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  304. /* Read device dumps */
  305. if (*fpos < elfcorebuf_sz + vmcoredd_orig_sz) {
  306. tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
  307. (size_t)*fpos, iov_iter_count(iter));
  308. start = *fpos - elfcorebuf_sz;
  309. if (vmcoredd_copy_dumps(iter, start, tsz))
  310. return -EFAULT;
  311. *fpos += tsz;
  312. acc += tsz;
  313. /* leave now if filled buffer already */
  314. if (!iov_iter_count(iter))
  315. return acc;
  316. }
  317. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  318. /* Read remaining elf notes */
  319. tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos,
  320. iov_iter_count(iter));
  321. kaddr = elfnotes_buf + *fpos - elfcorebuf_sz - vmcoredd_orig_sz;
  322. if (copy_to_iter(kaddr, tsz, iter) < tsz)
  323. return -EFAULT;
  324. *fpos += tsz;
  325. acc += tsz;
  326. /* leave now if filled buffer already */
  327. if (!iov_iter_count(iter))
  328. return acc;
  329. }
  330. list_for_each_entry(m, &vmcore_list, list) {
  331. if (*fpos < m->offset + m->size) {
  332. tsz = (size_t)min_t(unsigned long long,
  333. m->offset + m->size - *fpos,
  334. iov_iter_count(iter));
  335. start = m->paddr + *fpos - m->offset;
  336. tmp = read_from_oldmem(iter, tsz, &start,
  337. cc_platform_has(CC_ATTR_MEM_ENCRYPT));
  338. if (tmp < 0)
  339. return tmp;
  340. *fpos += tsz;
  341. acc += tsz;
  342. /* leave now if filled buffer already */
  343. if (!iov_iter_count(iter))
  344. return acc;
  345. }
  346. }
  347. return acc;
  348. }
  349. static ssize_t read_vmcore(struct kiocb *iocb, struct iov_iter *iter)
  350. {
  351. return __read_vmcore(iter, &iocb->ki_pos);
  352. }
  353. /*
  354. * The vmcore fault handler uses the page cache and fills data using the
  355. * standard __read_vmcore() function.
  356. *
  357. * On s390 the fault handler is used for memory regions that can't be mapped
  358. * directly with remap_pfn_range().
  359. */
  360. static vm_fault_t mmap_vmcore_fault(struct vm_fault *vmf)
  361. {
  362. #ifdef CONFIG_S390
  363. struct address_space *mapping = vmf->vma->vm_file->f_mapping;
  364. pgoff_t index = vmf->pgoff;
  365. struct iov_iter iter;
  366. struct kvec kvec;
  367. struct page *page;
  368. loff_t offset;
  369. int rc;
  370. page = find_or_create_page(mapping, index, GFP_KERNEL);
  371. if (!page)
  372. return VM_FAULT_OOM;
  373. if (!PageUptodate(page)) {
  374. offset = (loff_t) index << PAGE_SHIFT;
  375. kvec.iov_base = page_address(page);
  376. kvec.iov_len = PAGE_SIZE;
  377. iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, PAGE_SIZE);
  378. rc = __read_vmcore(&iter, &offset);
  379. if (rc < 0) {
  380. unlock_page(page);
  381. put_page(page);
  382. return vmf_error(rc);
  383. }
  384. SetPageUptodate(page);
  385. }
  386. unlock_page(page);
  387. vmf->page = page;
  388. return 0;
  389. #else
  390. return VM_FAULT_SIGBUS;
  391. #endif
  392. }
  393. static const struct vm_operations_struct vmcore_mmap_ops = {
  394. .fault = mmap_vmcore_fault,
  395. };
  396. /**
  397. * vmcore_alloc_buf - allocate buffer in vmalloc memory
  398. * @size: size of buffer
  399. *
  400. * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
  401. * the buffer to user-space by means of remap_vmalloc_range().
  402. *
  403. * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
  404. * disabled and there's no need to allow users to mmap the buffer.
  405. */
  406. static inline char *vmcore_alloc_buf(size_t size)
  407. {
  408. #ifdef CONFIG_MMU
  409. return vmalloc_user(size);
  410. #else
  411. return vzalloc(size);
  412. #endif
  413. }
  414. /*
  415. * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is
  416. * essential for mmap_vmcore() in order to map physically
  417. * non-contiguous objects (ELF header, ELF note segment and memory
  418. * regions in the 1st kernel pointed to by PT_LOAD entries) into
  419. * virtually contiguous user-space in ELF layout.
  420. */
  421. #ifdef CONFIG_MMU
  422. /*
  423. * remap_oldmem_pfn_checked - do remap_oldmem_pfn_range replacing all pages
  424. * reported as not being ram with the zero page.
  425. *
  426. * @vma: vm_area_struct describing requested mapping
  427. * @from: start remapping from
  428. * @pfn: page frame number to start remapping to
  429. * @size: remapping size
  430. * @prot: protection bits
  431. *
  432. * Returns zero on success, -EAGAIN on failure.
  433. */
  434. static int remap_oldmem_pfn_checked(struct vm_area_struct *vma,
  435. unsigned long from, unsigned long pfn,
  436. unsigned long size, pgprot_t prot)
  437. {
  438. unsigned long map_size;
  439. unsigned long pos_start, pos_end, pos;
  440. unsigned long zeropage_pfn = my_zero_pfn(0);
  441. size_t len = 0;
  442. pos_start = pfn;
  443. pos_end = pfn + (size >> PAGE_SHIFT);
  444. for (pos = pos_start; pos < pos_end; ++pos) {
  445. if (!pfn_is_ram(pos)) {
  446. /*
  447. * We hit a page which is not ram. Remap the continuous
  448. * region between pos_start and pos-1 and replace
  449. * the non-ram page at pos with the zero page.
  450. */
  451. if (pos > pos_start) {
  452. /* Remap continuous region */
  453. map_size = (pos - pos_start) << PAGE_SHIFT;
  454. if (remap_oldmem_pfn_range(vma, from + len,
  455. pos_start, map_size,
  456. prot))
  457. goto fail;
  458. len += map_size;
  459. }
  460. /* Remap the zero page */
  461. if (remap_oldmem_pfn_range(vma, from + len,
  462. zeropage_pfn,
  463. PAGE_SIZE, prot))
  464. goto fail;
  465. len += PAGE_SIZE;
  466. pos_start = pos + 1;
  467. }
  468. }
  469. if (pos > pos_start) {
  470. /* Remap the rest */
  471. map_size = (pos - pos_start) << PAGE_SHIFT;
  472. if (remap_oldmem_pfn_range(vma, from + len, pos_start,
  473. map_size, prot))
  474. goto fail;
  475. }
  476. return 0;
  477. fail:
  478. do_munmap(vma->vm_mm, from, len, NULL);
  479. return -EAGAIN;
  480. }
  481. static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
  482. unsigned long from, unsigned long pfn,
  483. unsigned long size, pgprot_t prot)
  484. {
  485. int ret, idx;
  486. /*
  487. * Check if a callback was registered to avoid looping over all
  488. * pages without a reason.
  489. */
  490. idx = srcu_read_lock(&vmcore_cb_srcu);
  491. if (!list_empty(&vmcore_cb_list))
  492. ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
  493. else
  494. ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
  495. srcu_read_unlock(&vmcore_cb_srcu, idx);
  496. return ret;
  497. }
  498. static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
  499. {
  500. size_t size = vma->vm_end - vma->vm_start;
  501. u64 start, end, len, tsz;
  502. struct vmcore *m;
  503. start = (u64)vma->vm_pgoff << PAGE_SHIFT;
  504. end = start + size;
  505. if (size > vmcore_size || end > vmcore_size)
  506. return -EINVAL;
  507. if (vma->vm_flags & (VM_WRITE | VM_EXEC))
  508. return -EPERM;
  509. vm_flags_mod(vma, VM_MIXEDMAP, VM_MAYWRITE | VM_MAYEXEC);
  510. vma->vm_ops = &vmcore_mmap_ops;
  511. len = 0;
  512. if (start < elfcorebuf_sz) {
  513. u64 pfn;
  514. tsz = min(elfcorebuf_sz - (size_t)start, size);
  515. pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
  516. if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
  517. vma->vm_page_prot))
  518. return -EAGAIN;
  519. size -= tsz;
  520. start += tsz;
  521. len += tsz;
  522. if (size == 0)
  523. return 0;
  524. }
  525. if (start < elfcorebuf_sz + elfnotes_sz) {
  526. void *kaddr;
  527. /* We add device dumps before other elf notes because the
  528. * other elf notes may not fill the elf notes buffer
  529. * completely and we will end up with zero-filled data
  530. * between the elf notes and the device dumps. Tools will
  531. * then try to decode this zero-filled data as valid notes
  532. * and we don't want that. Hence, adding device dumps before
  533. * the other elf notes ensure that zero-filled data can be
  534. * avoided. This also ensures that the device dumps and
  535. * other elf notes can be properly mmaped at page aligned
  536. * address.
  537. */
  538. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  539. /* Read device dumps */
  540. if (start < elfcorebuf_sz + vmcoredd_orig_sz) {
  541. u64 start_off;
  542. tsz = min(elfcorebuf_sz + vmcoredd_orig_sz -
  543. (size_t)start, size);
  544. start_off = start - elfcorebuf_sz;
  545. if (vmcoredd_mmap_dumps(vma, vma->vm_start + len,
  546. start_off, tsz))
  547. goto fail;
  548. size -= tsz;
  549. start += tsz;
  550. len += tsz;
  551. /* leave now if filled buffer already */
  552. if (!size)
  553. return 0;
  554. }
  555. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  556. /* Read remaining elf notes */
  557. tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
  558. kaddr = elfnotes_buf + start - elfcorebuf_sz - vmcoredd_orig_sz;
  559. if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
  560. kaddr, 0, tsz))
  561. goto fail;
  562. size -= tsz;
  563. start += tsz;
  564. len += tsz;
  565. if (size == 0)
  566. return 0;
  567. }
  568. list_for_each_entry(m, &vmcore_list, list) {
  569. if (start < m->offset + m->size) {
  570. u64 paddr = 0;
  571. tsz = (size_t)min_t(unsigned long long,
  572. m->offset + m->size - start, size);
  573. paddr = m->paddr + start - m->offset;
  574. if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
  575. paddr >> PAGE_SHIFT, tsz,
  576. vma->vm_page_prot))
  577. goto fail;
  578. size -= tsz;
  579. start += tsz;
  580. len += tsz;
  581. if (size == 0)
  582. return 0;
  583. }
  584. }
  585. return 0;
  586. fail:
  587. do_munmap(vma->vm_mm, vma->vm_start, len, NULL);
  588. return -EAGAIN;
  589. }
  590. #else
  591. static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
  592. {
  593. return -ENOSYS;
  594. }
  595. #endif
  596. static const struct proc_ops vmcore_proc_ops = {
  597. .proc_open = open_vmcore,
  598. .proc_read_iter = read_vmcore,
  599. .proc_lseek = default_llseek,
  600. .proc_mmap = mmap_vmcore,
  601. };
  602. static struct vmcore* __init get_new_element(void)
  603. {
  604. return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
  605. }
  606. static u64 get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
  607. struct list_head *vc_list)
  608. {
  609. u64 size;
  610. struct vmcore *m;
  611. size = elfsz + elfnotesegsz;
  612. list_for_each_entry(m, vc_list, list) {
  613. size += m->size;
  614. }
  615. return size;
  616. }
  617. /**
  618. * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry
  619. *
  620. * @ehdr_ptr: ELF header
  621. *
  622. * This function updates p_memsz member of each PT_NOTE entry in the
  623. * program header table pointed to by @ehdr_ptr to real size of ELF
  624. * note segment.
  625. */
  626. static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
  627. {
  628. int i, rc=0;
  629. Elf64_Phdr *phdr_ptr;
  630. Elf64_Nhdr *nhdr_ptr;
  631. phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
  632. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  633. void *notes_section;
  634. u64 offset, max_sz, sz, real_sz = 0;
  635. if (phdr_ptr->p_type != PT_NOTE)
  636. continue;
  637. max_sz = phdr_ptr->p_memsz;
  638. offset = phdr_ptr->p_offset;
  639. notes_section = kmalloc(max_sz, GFP_KERNEL);
  640. if (!notes_section)
  641. return -ENOMEM;
  642. rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
  643. if (rc < 0) {
  644. kfree(notes_section);
  645. return rc;
  646. }
  647. nhdr_ptr = notes_section;
  648. while (nhdr_ptr->n_namesz != 0) {
  649. sz = sizeof(Elf64_Nhdr) +
  650. (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
  651. (((u64)nhdr_ptr->n_descsz + 3) & ~3);
  652. if ((real_sz + sz) > max_sz) {
  653. pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
  654. nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
  655. break;
  656. }
  657. real_sz += sz;
  658. nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
  659. }
  660. kfree(notes_section);
  661. phdr_ptr->p_memsz = real_sz;
  662. if (real_sz == 0) {
  663. pr_warn("Warning: Zero PT_NOTE entries found\n");
  664. }
  665. }
  666. return 0;
  667. }
  668. /**
  669. * get_note_number_and_size_elf64 - get the number of PT_NOTE program
  670. * headers and sum of real size of their ELF note segment headers and
  671. * data.
  672. *
  673. * @ehdr_ptr: ELF header
  674. * @nr_ptnote: buffer for the number of PT_NOTE program headers
  675. * @sz_ptnote: buffer for size of unique PT_NOTE program header
  676. *
  677. * This function is used to merge multiple PT_NOTE program headers
  678. * into a unique single one. The resulting unique entry will have
  679. * @sz_ptnote in its phdr->p_mem.
  680. *
  681. * It is assumed that program headers with PT_NOTE type pointed to by
  682. * @ehdr_ptr has already been updated by update_note_header_size_elf64
  683. * and each of PT_NOTE program headers has actual ELF note segment
  684. * size in its p_memsz member.
  685. */
  686. static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr,
  687. int *nr_ptnote, u64 *sz_ptnote)
  688. {
  689. int i;
  690. Elf64_Phdr *phdr_ptr;
  691. *nr_ptnote = *sz_ptnote = 0;
  692. phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
  693. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  694. if (phdr_ptr->p_type != PT_NOTE)
  695. continue;
  696. *nr_ptnote += 1;
  697. *sz_ptnote += phdr_ptr->p_memsz;
  698. }
  699. return 0;
  700. }
  701. /**
  702. * copy_notes_elf64 - copy ELF note segments in a given buffer
  703. *
  704. * @ehdr_ptr: ELF header
  705. * @notes_buf: buffer into which ELF note segments are copied
  706. *
  707. * This function is used to copy ELF note segment in the 1st kernel
  708. * into the buffer @notes_buf in the 2nd kernel. It is assumed that
  709. * size of the buffer @notes_buf is equal to or larger than sum of the
  710. * real ELF note segment headers and data.
  711. *
  712. * It is assumed that program headers with PT_NOTE type pointed to by
  713. * @ehdr_ptr has already been updated by update_note_header_size_elf64
  714. * and each of PT_NOTE program headers has actual ELF note segment
  715. * size in its p_memsz member.
  716. */
  717. static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
  718. {
  719. int i, rc=0;
  720. Elf64_Phdr *phdr_ptr;
  721. phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1);
  722. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  723. u64 offset;
  724. if (phdr_ptr->p_type != PT_NOTE)
  725. continue;
  726. offset = phdr_ptr->p_offset;
  727. rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
  728. &offset);
  729. if (rc < 0)
  730. return rc;
  731. notes_buf += phdr_ptr->p_memsz;
  732. }
  733. return 0;
  734. }
  735. /* Merges all the PT_NOTE headers into one. */
  736. static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
  737. char **notes_buf, size_t *notes_sz)
  738. {
  739. int i, nr_ptnote=0, rc=0;
  740. char *tmp;
  741. Elf64_Ehdr *ehdr_ptr;
  742. Elf64_Phdr phdr;
  743. u64 phdr_sz = 0, note_off;
  744. ehdr_ptr = (Elf64_Ehdr *)elfptr;
  745. rc = update_note_header_size_elf64(ehdr_ptr);
  746. if (rc < 0)
  747. return rc;
  748. rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz);
  749. if (rc < 0)
  750. return rc;
  751. *notes_sz = roundup(phdr_sz, PAGE_SIZE);
  752. *notes_buf = vmcore_alloc_buf(*notes_sz);
  753. if (!*notes_buf)
  754. return -ENOMEM;
  755. rc = copy_notes_elf64(ehdr_ptr, *notes_buf);
  756. if (rc < 0)
  757. return rc;
  758. /* Prepare merged PT_NOTE program header. */
  759. phdr.p_type = PT_NOTE;
  760. phdr.p_flags = 0;
  761. note_off = sizeof(Elf64_Ehdr) +
  762. (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
  763. phdr.p_offset = roundup(note_off, PAGE_SIZE);
  764. phdr.p_vaddr = phdr.p_paddr = 0;
  765. phdr.p_filesz = phdr.p_memsz = phdr_sz;
  766. phdr.p_align = 0;
  767. /* Add merged PT_NOTE program header*/
  768. tmp = elfptr + sizeof(Elf64_Ehdr);
  769. memcpy(tmp, &phdr, sizeof(phdr));
  770. tmp += sizeof(phdr);
  771. /* Remove unwanted PT_NOTE program headers. */
  772. i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
  773. *elfsz = *elfsz - i;
  774. memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
  775. memset(elfptr + *elfsz, 0, i);
  776. *elfsz = roundup(*elfsz, PAGE_SIZE);
  777. /* Modify e_phnum to reflect merged headers. */
  778. ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
  779. /* Store the size of all notes. We need this to update the note
  780. * header when the device dumps will be added.
  781. */
  782. elfnotes_orig_sz = phdr.p_memsz;
  783. return 0;
  784. }
  785. /**
  786. * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry
  787. *
  788. * @ehdr_ptr: ELF header
  789. *
  790. * This function updates p_memsz member of each PT_NOTE entry in the
  791. * program header table pointed to by @ehdr_ptr to real size of ELF
  792. * note segment.
  793. */
  794. static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
  795. {
  796. int i, rc=0;
  797. Elf32_Phdr *phdr_ptr;
  798. Elf32_Nhdr *nhdr_ptr;
  799. phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
  800. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  801. void *notes_section;
  802. u64 offset, max_sz, sz, real_sz = 0;
  803. if (phdr_ptr->p_type != PT_NOTE)
  804. continue;
  805. max_sz = phdr_ptr->p_memsz;
  806. offset = phdr_ptr->p_offset;
  807. notes_section = kmalloc(max_sz, GFP_KERNEL);
  808. if (!notes_section)
  809. return -ENOMEM;
  810. rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
  811. if (rc < 0) {
  812. kfree(notes_section);
  813. return rc;
  814. }
  815. nhdr_ptr = notes_section;
  816. while (nhdr_ptr->n_namesz != 0) {
  817. sz = sizeof(Elf32_Nhdr) +
  818. (((u64)nhdr_ptr->n_namesz + 3) & ~3) +
  819. (((u64)nhdr_ptr->n_descsz + 3) & ~3);
  820. if ((real_sz + sz) > max_sz) {
  821. pr_warn("Warning: Exceeded p_memsz, dropping PT_NOTE entry n_namesz=0x%x, n_descsz=0x%x\n",
  822. nhdr_ptr->n_namesz, nhdr_ptr->n_descsz);
  823. break;
  824. }
  825. real_sz += sz;
  826. nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
  827. }
  828. kfree(notes_section);
  829. phdr_ptr->p_memsz = real_sz;
  830. if (real_sz == 0) {
  831. pr_warn("Warning: Zero PT_NOTE entries found\n");
  832. }
  833. }
  834. return 0;
  835. }
  836. /**
  837. * get_note_number_and_size_elf32 - get the number of PT_NOTE program
  838. * headers and sum of real size of their ELF note segment headers and
  839. * data.
  840. *
  841. * @ehdr_ptr: ELF header
  842. * @nr_ptnote: buffer for the number of PT_NOTE program headers
  843. * @sz_ptnote: buffer for size of unique PT_NOTE program header
  844. *
  845. * This function is used to merge multiple PT_NOTE program headers
  846. * into a unique single one. The resulting unique entry will have
  847. * @sz_ptnote in its phdr->p_mem.
  848. *
  849. * It is assumed that program headers with PT_NOTE type pointed to by
  850. * @ehdr_ptr has already been updated by update_note_header_size_elf32
  851. * and each of PT_NOTE program headers has actual ELF note segment
  852. * size in its p_memsz member.
  853. */
  854. static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr,
  855. int *nr_ptnote, u64 *sz_ptnote)
  856. {
  857. int i;
  858. Elf32_Phdr *phdr_ptr;
  859. *nr_ptnote = *sz_ptnote = 0;
  860. phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
  861. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  862. if (phdr_ptr->p_type != PT_NOTE)
  863. continue;
  864. *nr_ptnote += 1;
  865. *sz_ptnote += phdr_ptr->p_memsz;
  866. }
  867. return 0;
  868. }
  869. /**
  870. * copy_notes_elf32 - copy ELF note segments in a given buffer
  871. *
  872. * @ehdr_ptr: ELF header
  873. * @notes_buf: buffer into which ELF note segments are copied
  874. *
  875. * This function is used to copy ELF note segment in the 1st kernel
  876. * into the buffer @notes_buf in the 2nd kernel. It is assumed that
  877. * size of the buffer @notes_buf is equal to or larger than sum of the
  878. * real ELF note segment headers and data.
  879. *
  880. * It is assumed that program headers with PT_NOTE type pointed to by
  881. * @ehdr_ptr has already been updated by update_note_header_size_elf32
  882. * and each of PT_NOTE program headers has actual ELF note segment
  883. * size in its p_memsz member.
  884. */
  885. static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
  886. {
  887. int i, rc=0;
  888. Elf32_Phdr *phdr_ptr;
  889. phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1);
  890. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  891. u64 offset;
  892. if (phdr_ptr->p_type != PT_NOTE)
  893. continue;
  894. offset = phdr_ptr->p_offset;
  895. rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
  896. &offset);
  897. if (rc < 0)
  898. return rc;
  899. notes_buf += phdr_ptr->p_memsz;
  900. }
  901. return 0;
  902. }
  903. /* Merges all the PT_NOTE headers into one. */
  904. static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
  905. char **notes_buf, size_t *notes_sz)
  906. {
  907. int i, nr_ptnote=0, rc=0;
  908. char *tmp;
  909. Elf32_Ehdr *ehdr_ptr;
  910. Elf32_Phdr phdr;
  911. u64 phdr_sz = 0, note_off;
  912. ehdr_ptr = (Elf32_Ehdr *)elfptr;
  913. rc = update_note_header_size_elf32(ehdr_ptr);
  914. if (rc < 0)
  915. return rc;
  916. rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz);
  917. if (rc < 0)
  918. return rc;
  919. *notes_sz = roundup(phdr_sz, PAGE_SIZE);
  920. *notes_buf = vmcore_alloc_buf(*notes_sz);
  921. if (!*notes_buf)
  922. return -ENOMEM;
  923. rc = copy_notes_elf32(ehdr_ptr, *notes_buf);
  924. if (rc < 0)
  925. return rc;
  926. /* Prepare merged PT_NOTE program header. */
  927. phdr.p_type = PT_NOTE;
  928. phdr.p_flags = 0;
  929. note_off = sizeof(Elf32_Ehdr) +
  930. (ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
  931. phdr.p_offset = roundup(note_off, PAGE_SIZE);
  932. phdr.p_vaddr = phdr.p_paddr = 0;
  933. phdr.p_filesz = phdr.p_memsz = phdr_sz;
  934. phdr.p_align = 0;
  935. /* Add merged PT_NOTE program header*/
  936. tmp = elfptr + sizeof(Elf32_Ehdr);
  937. memcpy(tmp, &phdr, sizeof(phdr));
  938. tmp += sizeof(phdr);
  939. /* Remove unwanted PT_NOTE program headers. */
  940. i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
  941. *elfsz = *elfsz - i;
  942. memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
  943. memset(elfptr + *elfsz, 0, i);
  944. *elfsz = roundup(*elfsz, PAGE_SIZE);
  945. /* Modify e_phnum to reflect merged headers. */
  946. ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
  947. /* Store the size of all notes. We need this to update the note
  948. * header when the device dumps will be added.
  949. */
  950. elfnotes_orig_sz = phdr.p_memsz;
  951. return 0;
  952. }
  953. /* Add memory chunks represented by program headers to vmcore list. Also update
  954. * the new offset fields of exported program headers. */
  955. static int __init process_ptload_program_headers_elf64(char *elfptr,
  956. size_t elfsz,
  957. size_t elfnotes_sz,
  958. struct list_head *vc_list)
  959. {
  960. int i;
  961. Elf64_Ehdr *ehdr_ptr;
  962. Elf64_Phdr *phdr_ptr;
  963. loff_t vmcore_off;
  964. struct vmcore *new;
  965. ehdr_ptr = (Elf64_Ehdr *)elfptr;
  966. phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
  967. /* Skip Elf header, program headers and Elf note segment. */
  968. vmcore_off = elfsz + elfnotes_sz;
  969. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  970. u64 paddr, start, end, size;
  971. if (phdr_ptr->p_type != PT_LOAD)
  972. continue;
  973. paddr = phdr_ptr->p_offset;
  974. start = rounddown(paddr, PAGE_SIZE);
  975. end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
  976. size = end - start;
  977. /* Add this contiguous chunk of memory to vmcore list.*/
  978. new = get_new_element();
  979. if (!new)
  980. return -ENOMEM;
  981. new->paddr = start;
  982. new->size = size;
  983. list_add_tail(&new->list, vc_list);
  984. /* Update the program header offset. */
  985. phdr_ptr->p_offset = vmcore_off + (paddr - start);
  986. vmcore_off = vmcore_off + size;
  987. }
  988. return 0;
  989. }
  990. static int __init process_ptload_program_headers_elf32(char *elfptr,
  991. size_t elfsz,
  992. size_t elfnotes_sz,
  993. struct list_head *vc_list)
  994. {
  995. int i;
  996. Elf32_Ehdr *ehdr_ptr;
  997. Elf32_Phdr *phdr_ptr;
  998. loff_t vmcore_off;
  999. struct vmcore *new;
  1000. ehdr_ptr = (Elf32_Ehdr *)elfptr;
  1001. phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
  1002. /* Skip Elf header, program headers and Elf note segment. */
  1003. vmcore_off = elfsz + elfnotes_sz;
  1004. for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
  1005. u64 paddr, start, end, size;
  1006. if (phdr_ptr->p_type != PT_LOAD)
  1007. continue;
  1008. paddr = phdr_ptr->p_offset;
  1009. start = rounddown(paddr, PAGE_SIZE);
  1010. end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
  1011. size = end - start;
  1012. /* Add this contiguous chunk of memory to vmcore list.*/
  1013. new = get_new_element();
  1014. if (!new)
  1015. return -ENOMEM;
  1016. new->paddr = start;
  1017. new->size = size;
  1018. list_add_tail(&new->list, vc_list);
  1019. /* Update the program header offset */
  1020. phdr_ptr->p_offset = vmcore_off + (paddr - start);
  1021. vmcore_off = vmcore_off + size;
  1022. }
  1023. return 0;
  1024. }
  1025. /* Sets offset fields of vmcore elements. */
  1026. static void set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
  1027. struct list_head *vc_list)
  1028. {
  1029. loff_t vmcore_off;
  1030. struct vmcore *m;
  1031. /* Skip Elf header, program headers and Elf note segment. */
  1032. vmcore_off = elfsz + elfnotes_sz;
  1033. list_for_each_entry(m, vc_list, list) {
  1034. m->offset = vmcore_off;
  1035. vmcore_off += m->size;
  1036. }
  1037. }
  1038. static void free_elfcorebuf(void)
  1039. {
  1040. free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
  1041. elfcorebuf = NULL;
  1042. vfree(elfnotes_buf);
  1043. elfnotes_buf = NULL;
  1044. }
  1045. static int __init parse_crash_elf64_headers(void)
  1046. {
  1047. int rc=0;
  1048. Elf64_Ehdr ehdr;
  1049. u64 addr;
  1050. addr = elfcorehdr_addr;
  1051. /* Read Elf header */
  1052. rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
  1053. if (rc < 0)
  1054. return rc;
  1055. /* Do some basic Verification. */
  1056. if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
  1057. (ehdr.e_type != ET_CORE) ||
  1058. !vmcore_elf64_check_arch(&ehdr) ||
  1059. ehdr.e_ident[EI_CLASS] != ELFCLASS64 ||
  1060. ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
  1061. ehdr.e_version != EV_CURRENT ||
  1062. ehdr.e_ehsize != sizeof(Elf64_Ehdr) ||
  1063. ehdr.e_phentsize != sizeof(Elf64_Phdr) ||
  1064. ehdr.e_phnum == 0) {
  1065. pr_warn("Warning: Core image elf header is not sane\n");
  1066. return -EINVAL;
  1067. }
  1068. /* Read in all elf headers. */
  1069. elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) +
  1070. ehdr.e_phnum * sizeof(Elf64_Phdr);
  1071. elfcorebuf_sz = elfcorebuf_sz_orig;
  1072. elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  1073. get_order(elfcorebuf_sz_orig));
  1074. if (!elfcorebuf)
  1075. return -ENOMEM;
  1076. addr = elfcorehdr_addr;
  1077. rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
  1078. if (rc < 0)
  1079. goto fail;
  1080. /* Merge all PT_NOTE headers into one. */
  1081. rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz,
  1082. &elfnotes_buf, &elfnotes_sz);
  1083. if (rc)
  1084. goto fail;
  1085. rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
  1086. elfnotes_sz, &vmcore_list);
  1087. if (rc)
  1088. goto fail;
  1089. set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
  1090. return 0;
  1091. fail:
  1092. free_elfcorebuf();
  1093. return rc;
  1094. }
  1095. static int __init parse_crash_elf32_headers(void)
  1096. {
  1097. int rc=0;
  1098. Elf32_Ehdr ehdr;
  1099. u64 addr;
  1100. addr = elfcorehdr_addr;
  1101. /* Read Elf header */
  1102. rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
  1103. if (rc < 0)
  1104. return rc;
  1105. /* Do some basic Verification. */
  1106. if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0 ||
  1107. (ehdr.e_type != ET_CORE) ||
  1108. !vmcore_elf32_check_arch(&ehdr) ||
  1109. ehdr.e_ident[EI_CLASS] != ELFCLASS32||
  1110. ehdr.e_ident[EI_VERSION] != EV_CURRENT ||
  1111. ehdr.e_version != EV_CURRENT ||
  1112. ehdr.e_ehsize != sizeof(Elf32_Ehdr) ||
  1113. ehdr.e_phentsize != sizeof(Elf32_Phdr) ||
  1114. ehdr.e_phnum == 0) {
  1115. pr_warn("Warning: Core image elf header is not sane\n");
  1116. return -EINVAL;
  1117. }
  1118. /* Read in all elf headers. */
  1119. elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
  1120. elfcorebuf_sz = elfcorebuf_sz_orig;
  1121. elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
  1122. get_order(elfcorebuf_sz_orig));
  1123. if (!elfcorebuf)
  1124. return -ENOMEM;
  1125. addr = elfcorehdr_addr;
  1126. rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
  1127. if (rc < 0)
  1128. goto fail;
  1129. /* Merge all PT_NOTE headers into one. */
  1130. rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz,
  1131. &elfnotes_buf, &elfnotes_sz);
  1132. if (rc)
  1133. goto fail;
  1134. rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
  1135. elfnotes_sz, &vmcore_list);
  1136. if (rc)
  1137. goto fail;
  1138. set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
  1139. return 0;
  1140. fail:
  1141. free_elfcorebuf();
  1142. return rc;
  1143. }
  1144. static int __init parse_crash_elf_headers(void)
  1145. {
  1146. unsigned char e_ident[EI_NIDENT];
  1147. u64 addr;
  1148. int rc=0;
  1149. addr = elfcorehdr_addr;
  1150. rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
  1151. if (rc < 0)
  1152. return rc;
  1153. if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
  1154. pr_warn("Warning: Core image elf header not found\n");
  1155. return -EINVAL;
  1156. }
  1157. if (e_ident[EI_CLASS] == ELFCLASS64) {
  1158. rc = parse_crash_elf64_headers();
  1159. if (rc)
  1160. return rc;
  1161. } else if (e_ident[EI_CLASS] == ELFCLASS32) {
  1162. rc = parse_crash_elf32_headers();
  1163. if (rc)
  1164. return rc;
  1165. } else {
  1166. pr_warn("Warning: Core image elf header is not sane\n");
  1167. return -EINVAL;
  1168. }
  1169. /* Determine vmcore size. */
  1170. vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
  1171. &vmcore_list);
  1172. return 0;
  1173. }
  1174. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  1175. /**
  1176. * vmcoredd_write_header - Write vmcore device dump header at the
  1177. * beginning of the dump's buffer.
  1178. * @buf: Output buffer where the note is written
  1179. * @data: Dump info
  1180. * @size: Size of the dump
  1181. *
  1182. * Fills beginning of the dump's buffer with vmcore device dump header.
  1183. */
  1184. static void vmcoredd_write_header(void *buf, struct vmcoredd_data *data,
  1185. u32 size)
  1186. {
  1187. struct vmcoredd_header *vdd_hdr = (struct vmcoredd_header *)buf;
  1188. vdd_hdr->n_namesz = sizeof(vdd_hdr->name);
  1189. vdd_hdr->n_descsz = size + sizeof(vdd_hdr->dump_name);
  1190. vdd_hdr->n_type = NT_VMCOREDD;
  1191. strncpy((char *)vdd_hdr->name, VMCOREDD_NOTE_NAME,
  1192. sizeof(vdd_hdr->name));
  1193. memcpy(vdd_hdr->dump_name, data->dump_name, sizeof(vdd_hdr->dump_name));
  1194. }
  1195. /**
  1196. * vmcoredd_update_program_headers - Update all Elf program headers
  1197. * @elfptr: Pointer to elf header
  1198. * @elfnotesz: Size of elf notes aligned to page size
  1199. * @vmcoreddsz: Size of device dumps to be added to elf note header
  1200. *
  1201. * Determine type of Elf header (Elf64 or Elf32) and update the elf note size.
  1202. * Also update the offsets of all the program headers after the elf note header.
  1203. */
  1204. static void vmcoredd_update_program_headers(char *elfptr, size_t elfnotesz,
  1205. size_t vmcoreddsz)
  1206. {
  1207. unsigned char *e_ident = (unsigned char *)elfptr;
  1208. u64 start, end, size;
  1209. loff_t vmcore_off;
  1210. u32 i;
  1211. vmcore_off = elfcorebuf_sz + elfnotesz;
  1212. if (e_ident[EI_CLASS] == ELFCLASS64) {
  1213. Elf64_Ehdr *ehdr = (Elf64_Ehdr *)elfptr;
  1214. Elf64_Phdr *phdr = (Elf64_Phdr *)(elfptr + sizeof(Elf64_Ehdr));
  1215. /* Update all program headers */
  1216. for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
  1217. if (phdr->p_type == PT_NOTE) {
  1218. /* Update note size */
  1219. phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
  1220. phdr->p_filesz = phdr->p_memsz;
  1221. continue;
  1222. }
  1223. start = rounddown(phdr->p_offset, PAGE_SIZE);
  1224. end = roundup(phdr->p_offset + phdr->p_memsz,
  1225. PAGE_SIZE);
  1226. size = end - start;
  1227. phdr->p_offset = vmcore_off + (phdr->p_offset - start);
  1228. vmcore_off += size;
  1229. }
  1230. } else {
  1231. Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfptr;
  1232. Elf32_Phdr *phdr = (Elf32_Phdr *)(elfptr + sizeof(Elf32_Ehdr));
  1233. /* Update all program headers */
  1234. for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
  1235. if (phdr->p_type == PT_NOTE) {
  1236. /* Update note size */
  1237. phdr->p_memsz = elfnotes_orig_sz + vmcoreddsz;
  1238. phdr->p_filesz = phdr->p_memsz;
  1239. continue;
  1240. }
  1241. start = rounddown(phdr->p_offset, PAGE_SIZE);
  1242. end = roundup(phdr->p_offset + phdr->p_memsz,
  1243. PAGE_SIZE);
  1244. size = end - start;
  1245. phdr->p_offset = vmcore_off + (phdr->p_offset - start);
  1246. vmcore_off += size;
  1247. }
  1248. }
  1249. }
  1250. /**
  1251. * vmcoredd_update_size - Update the total size of the device dumps and update
  1252. * Elf header
  1253. * @dump_size: Size of the current device dump to be added to total size
  1254. *
  1255. * Update the total size of all the device dumps and update the Elf program
  1256. * headers. Calculate the new offsets for the vmcore list and update the
  1257. * total vmcore size.
  1258. */
  1259. static void vmcoredd_update_size(size_t dump_size)
  1260. {
  1261. vmcoredd_orig_sz += dump_size;
  1262. elfnotes_sz = roundup(elfnotes_orig_sz, PAGE_SIZE) + vmcoredd_orig_sz;
  1263. vmcoredd_update_program_headers(elfcorebuf, elfnotes_sz,
  1264. vmcoredd_orig_sz);
  1265. /* Update vmcore list offsets */
  1266. set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
  1267. vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
  1268. &vmcore_list);
  1269. proc_vmcore->size = vmcore_size;
  1270. }
  1271. /**
  1272. * vmcore_add_device_dump - Add a buffer containing device dump to vmcore
  1273. * @data: dump info.
  1274. *
  1275. * Allocate a buffer and invoke the calling driver's dump collect routine.
  1276. * Write Elf note at the beginning of the buffer to indicate vmcore device
  1277. * dump and add the dump to global list.
  1278. */
  1279. int vmcore_add_device_dump(struct vmcoredd_data *data)
  1280. {
  1281. struct vmcoredd_node *dump;
  1282. void *buf = NULL;
  1283. size_t data_size;
  1284. int ret;
  1285. if (vmcoredd_disabled) {
  1286. pr_err_once("Device dump is disabled\n");
  1287. return -EINVAL;
  1288. }
  1289. if (!data || !strlen(data->dump_name) ||
  1290. !data->vmcoredd_callback || !data->size)
  1291. return -EINVAL;
  1292. dump = vzalloc(sizeof(*dump));
  1293. if (!dump) {
  1294. ret = -ENOMEM;
  1295. goto out_err;
  1296. }
  1297. /* Keep size of the buffer page aligned so that it can be mmaped */
  1298. data_size = roundup(sizeof(struct vmcoredd_header) + data->size,
  1299. PAGE_SIZE);
  1300. /* Allocate buffer for driver's to write their dumps */
  1301. buf = vmcore_alloc_buf(data_size);
  1302. if (!buf) {
  1303. ret = -ENOMEM;
  1304. goto out_err;
  1305. }
  1306. vmcoredd_write_header(buf, data, data_size -
  1307. sizeof(struct vmcoredd_header));
  1308. /* Invoke the driver's dump collection routing */
  1309. ret = data->vmcoredd_callback(data, buf +
  1310. sizeof(struct vmcoredd_header));
  1311. if (ret)
  1312. goto out_err;
  1313. dump->buf = buf;
  1314. dump->size = data_size;
  1315. /* Add the dump to driver sysfs list */
  1316. mutex_lock(&vmcoredd_mutex);
  1317. list_add_tail(&dump->list, &vmcoredd_list);
  1318. mutex_unlock(&vmcoredd_mutex);
  1319. vmcoredd_update_size(data_size);
  1320. return 0;
  1321. out_err:
  1322. vfree(buf);
  1323. vfree(dump);
  1324. return ret;
  1325. }
  1326. EXPORT_SYMBOL(vmcore_add_device_dump);
  1327. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  1328. /* Free all dumps in vmcore device dump list */
  1329. static void vmcore_free_device_dumps(void)
  1330. {
  1331. #ifdef CONFIG_PROC_VMCORE_DEVICE_DUMP
  1332. mutex_lock(&vmcoredd_mutex);
  1333. while (!list_empty(&vmcoredd_list)) {
  1334. struct vmcoredd_node *dump;
  1335. dump = list_first_entry(&vmcoredd_list, struct vmcoredd_node,
  1336. list);
  1337. list_del(&dump->list);
  1338. vfree(dump->buf);
  1339. vfree(dump);
  1340. }
  1341. mutex_unlock(&vmcoredd_mutex);
  1342. #endif /* CONFIG_PROC_VMCORE_DEVICE_DUMP */
  1343. }
  1344. /* Init function for vmcore module. */
  1345. static int __init vmcore_init(void)
  1346. {
  1347. int rc = 0;
  1348. /* Allow architectures to allocate ELF header in 2nd kernel */
  1349. rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
  1350. if (rc)
  1351. return rc;
  1352. /*
  1353. * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
  1354. * then capture the dump.
  1355. */
  1356. if (!(is_vmcore_usable()))
  1357. return rc;
  1358. rc = parse_crash_elf_headers();
  1359. if (rc) {
  1360. pr_warn("Kdump: vmcore not initialized\n");
  1361. return rc;
  1362. }
  1363. elfcorehdr_free(elfcorehdr_addr);
  1364. elfcorehdr_addr = ELFCORE_ADDR_ERR;
  1365. proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &vmcore_proc_ops);
  1366. if (proc_vmcore)
  1367. proc_vmcore->size = vmcore_size;
  1368. return 0;
  1369. }
  1370. fs_initcall(vmcore_init);
  1371. /* Cleanup function for vmcore module. */
  1372. void vmcore_cleanup(void)
  1373. {
  1374. if (proc_vmcore) {
  1375. proc_remove(proc_vmcore);
  1376. proc_vmcore = NULL;
  1377. }
  1378. /* clear the vmcore list. */
  1379. while (!list_empty(&vmcore_list)) {
  1380. struct vmcore *m;
  1381. m = list_first_entry(&vmcore_list, struct vmcore, list);
  1382. list_del(&m->list);
  1383. kfree(m);
  1384. }
  1385. free_elfcorebuf();
  1386. /* clear vmcore device dump list */
  1387. vmcore_free_device_dumps();
  1388. }