tlb.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  4. */
  5. #include <linux/mm.h>
  6. #include <linux/module.h>
  7. #include <linux/sched/signal.h>
  8. #include <asm/tlbflush.h>
  9. #include <as-layout.h>
  10. #include <mem_user.h>
  11. #include <os.h>
  12. #include <skas.h>
  13. #include <kern_util.h>
  14. struct host_vm_change {
  15. struct host_vm_op {
  16. enum { NONE, MMAP, MUNMAP, MPROTECT } type;
  17. union {
  18. struct {
  19. unsigned long addr;
  20. unsigned long len;
  21. unsigned int prot;
  22. int fd;
  23. __u64 offset;
  24. } mmap;
  25. struct {
  26. unsigned long addr;
  27. unsigned long len;
  28. } munmap;
  29. struct {
  30. unsigned long addr;
  31. unsigned long len;
  32. unsigned int prot;
  33. } mprotect;
  34. } u;
  35. } ops[1];
  36. int userspace;
  37. int index;
  38. struct mm_struct *mm;
  39. void *data;
  40. int force;
  41. };
  42. #define INIT_HVC(mm, force, userspace) \
  43. ((struct host_vm_change) \
  44. { .ops = { { .type = NONE } }, \
  45. .mm = mm, \
  46. .data = NULL, \
  47. .userspace = userspace, \
  48. .index = 0, \
  49. .force = force })
  50. static void report_enomem(void)
  51. {
  52. printk(KERN_ERR "UML ran out of memory on the host side! "
  53. "This can happen due to a memory limitation or "
  54. "vm.max_map_count has been reached.\n");
  55. }
  56. static int do_ops(struct host_vm_change *hvc, int end,
  57. int finished)
  58. {
  59. struct host_vm_op *op;
  60. int i, ret = 0;
  61. for (i = 0; i < end && !ret; i++) {
  62. op = &hvc->ops[i];
  63. switch (op->type) {
  64. case MMAP:
  65. if (hvc->userspace)
  66. ret = map(&hvc->mm->context.id, op->u.mmap.addr,
  67. op->u.mmap.len, op->u.mmap.prot,
  68. op->u.mmap.fd,
  69. op->u.mmap.offset, finished,
  70. &hvc->data);
  71. else
  72. map_memory(op->u.mmap.addr, op->u.mmap.offset,
  73. op->u.mmap.len, 1, 1, 1);
  74. break;
  75. case MUNMAP:
  76. if (hvc->userspace)
  77. ret = unmap(&hvc->mm->context.id,
  78. op->u.munmap.addr,
  79. op->u.munmap.len, finished,
  80. &hvc->data);
  81. else
  82. ret = os_unmap_memory(
  83. (void *) op->u.munmap.addr,
  84. op->u.munmap.len);
  85. break;
  86. case MPROTECT:
  87. if (hvc->userspace)
  88. ret = protect(&hvc->mm->context.id,
  89. op->u.mprotect.addr,
  90. op->u.mprotect.len,
  91. op->u.mprotect.prot,
  92. finished, &hvc->data);
  93. else
  94. ret = os_protect_memory(
  95. (void *) op->u.mprotect.addr,
  96. op->u.mprotect.len,
  97. 1, 1, 1);
  98. break;
  99. default:
  100. printk(KERN_ERR "Unknown op type %d in do_ops\n",
  101. op->type);
  102. BUG();
  103. break;
  104. }
  105. }
  106. if (ret == -ENOMEM)
  107. report_enomem();
  108. return ret;
  109. }
  110. static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len,
  111. unsigned int prot, struct host_vm_change *hvc)
  112. {
  113. __u64 offset;
  114. struct host_vm_op *last;
  115. int fd = -1, ret = 0;
  116. if (hvc->userspace)
  117. fd = phys_mapping(phys, &offset);
  118. else
  119. offset = phys;
  120. if (hvc->index != 0) {
  121. last = &hvc->ops[hvc->index - 1];
  122. if ((last->type == MMAP) &&
  123. (last->u.mmap.addr + last->u.mmap.len == virt) &&
  124. (last->u.mmap.prot == prot) && (last->u.mmap.fd == fd) &&
  125. (last->u.mmap.offset + last->u.mmap.len == offset)) {
  126. last->u.mmap.len += len;
  127. return 0;
  128. }
  129. }
  130. if (hvc->index == ARRAY_SIZE(hvc->ops)) {
  131. ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
  132. hvc->index = 0;
  133. }
  134. hvc->ops[hvc->index++] = ((struct host_vm_op)
  135. { .type = MMAP,
  136. .u = { .mmap = { .addr = virt,
  137. .len = len,
  138. .prot = prot,
  139. .fd = fd,
  140. .offset = offset }
  141. } });
  142. return ret;
  143. }
  144. static int add_munmap(unsigned long addr, unsigned long len,
  145. struct host_vm_change *hvc)
  146. {
  147. struct host_vm_op *last;
  148. int ret = 0;
  149. if (hvc->index != 0) {
  150. last = &hvc->ops[hvc->index - 1];
  151. if ((last->type == MUNMAP) &&
  152. (last->u.munmap.addr + last->u.mmap.len == addr)) {
  153. last->u.munmap.len += len;
  154. return 0;
  155. }
  156. }
  157. if (hvc->index == ARRAY_SIZE(hvc->ops)) {
  158. ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
  159. hvc->index = 0;
  160. }
  161. hvc->ops[hvc->index++] = ((struct host_vm_op)
  162. { .type = MUNMAP,
  163. .u = { .munmap = { .addr = addr,
  164. .len = len } } });
  165. return ret;
  166. }
  167. static int add_mprotect(unsigned long addr, unsigned long len,
  168. unsigned int prot, struct host_vm_change *hvc)
  169. {
  170. struct host_vm_op *last;
  171. int ret = 0;
  172. if (hvc->index != 0) {
  173. last = &hvc->ops[hvc->index - 1];
  174. if ((last->type == MPROTECT) &&
  175. (last->u.mprotect.addr + last->u.mprotect.len == addr) &&
  176. (last->u.mprotect.prot == prot)) {
  177. last->u.mprotect.len += len;
  178. return 0;
  179. }
  180. }
  181. if (hvc->index == ARRAY_SIZE(hvc->ops)) {
  182. ret = do_ops(hvc, ARRAY_SIZE(hvc->ops), 0);
  183. hvc->index = 0;
  184. }
  185. hvc->ops[hvc->index++] = ((struct host_vm_op)
  186. { .type = MPROTECT,
  187. .u = { .mprotect = { .addr = addr,
  188. .len = len,
  189. .prot = prot } } });
  190. return ret;
  191. }
  192. #define ADD_ROUND(n, inc) (((n) + (inc)) & ~((inc) - 1))
  193. static inline int update_pte_range(pmd_t *pmd, unsigned long addr,
  194. unsigned long end,
  195. struct host_vm_change *hvc)
  196. {
  197. pte_t *pte;
  198. int r, w, x, prot, ret = 0;
  199. pte = pte_offset_kernel(pmd, addr);
  200. do {
  201. r = pte_read(*pte);
  202. w = pte_write(*pte);
  203. x = pte_exec(*pte);
  204. if (!pte_young(*pte)) {
  205. r = 0;
  206. w = 0;
  207. } else if (!pte_dirty(*pte))
  208. w = 0;
  209. prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
  210. (x ? UM_PROT_EXEC : 0));
  211. if (hvc->force || pte_newpage(*pte)) {
  212. if (pte_present(*pte)) {
  213. if (pte_newpage(*pte))
  214. ret = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
  215. PAGE_SIZE, prot, hvc);
  216. } else
  217. ret = add_munmap(addr, PAGE_SIZE, hvc);
  218. } else if (pte_newprot(*pte))
  219. ret = add_mprotect(addr, PAGE_SIZE, prot, hvc);
  220. *pte = pte_mkuptodate(*pte);
  221. } while (pte++, addr += PAGE_SIZE, ((addr < end) && !ret));
  222. return ret;
  223. }
  224. static inline int update_pmd_range(pud_t *pud, unsigned long addr,
  225. unsigned long end,
  226. struct host_vm_change *hvc)
  227. {
  228. pmd_t *pmd;
  229. unsigned long next;
  230. int ret = 0;
  231. pmd = pmd_offset(pud, addr);
  232. do {
  233. next = pmd_addr_end(addr, end);
  234. if (!pmd_present(*pmd)) {
  235. if (hvc->force || pmd_newpage(*pmd)) {
  236. ret = add_munmap(addr, next - addr, hvc);
  237. pmd_mkuptodate(*pmd);
  238. }
  239. }
  240. else ret = update_pte_range(pmd, addr, next, hvc);
  241. } while (pmd++, addr = next, ((addr < end) && !ret));
  242. return ret;
  243. }
  244. static inline int update_pud_range(p4d_t *p4d, unsigned long addr,
  245. unsigned long end,
  246. struct host_vm_change *hvc)
  247. {
  248. pud_t *pud;
  249. unsigned long next;
  250. int ret = 0;
  251. pud = pud_offset(p4d, addr);
  252. do {
  253. next = pud_addr_end(addr, end);
  254. if (!pud_present(*pud)) {
  255. if (hvc->force || pud_newpage(*pud)) {
  256. ret = add_munmap(addr, next - addr, hvc);
  257. pud_mkuptodate(*pud);
  258. }
  259. }
  260. else ret = update_pmd_range(pud, addr, next, hvc);
  261. } while (pud++, addr = next, ((addr < end) && !ret));
  262. return ret;
  263. }
  264. static inline int update_p4d_range(pgd_t *pgd, unsigned long addr,
  265. unsigned long end,
  266. struct host_vm_change *hvc)
  267. {
  268. p4d_t *p4d;
  269. unsigned long next;
  270. int ret = 0;
  271. p4d = p4d_offset(pgd, addr);
  272. do {
  273. next = p4d_addr_end(addr, end);
  274. if (!p4d_present(*p4d)) {
  275. if (hvc->force || p4d_newpage(*p4d)) {
  276. ret = add_munmap(addr, next - addr, hvc);
  277. p4d_mkuptodate(*p4d);
  278. }
  279. } else
  280. ret = update_pud_range(p4d, addr, next, hvc);
  281. } while (p4d++, addr = next, ((addr < end) && !ret));
  282. return ret;
  283. }
  284. void fix_range_common(struct mm_struct *mm, unsigned long start_addr,
  285. unsigned long end_addr, int force)
  286. {
  287. pgd_t *pgd;
  288. struct host_vm_change hvc;
  289. unsigned long addr = start_addr, next;
  290. int ret = 0, userspace = 1;
  291. hvc = INIT_HVC(mm, force, userspace);
  292. pgd = pgd_offset(mm, addr);
  293. do {
  294. next = pgd_addr_end(addr, end_addr);
  295. if (!pgd_present(*pgd)) {
  296. if (force || pgd_newpage(*pgd)) {
  297. ret = add_munmap(addr, next - addr, &hvc);
  298. pgd_mkuptodate(*pgd);
  299. }
  300. } else
  301. ret = update_p4d_range(pgd, addr, next, &hvc);
  302. } while (pgd++, addr = next, ((addr < end_addr) && !ret));
  303. if (!ret)
  304. ret = do_ops(&hvc, hvc.index, 1);
  305. /* This is not an else because ret is modified above */
  306. if (ret) {
  307. struct mm_id *mm_idp = &current->mm->context.id;
  308. printk(KERN_ERR "fix_range_common: failed, killing current "
  309. "process: %d\n", task_tgid_vnr(current));
  310. mm_idp->kill = 1;
  311. }
  312. }
  313. static int flush_tlb_kernel_range_common(unsigned long start, unsigned long end)
  314. {
  315. struct mm_struct *mm;
  316. pgd_t *pgd;
  317. p4d_t *p4d;
  318. pud_t *pud;
  319. pmd_t *pmd;
  320. pte_t *pte;
  321. unsigned long addr, last;
  322. int updated = 0, err = 0, force = 0, userspace = 0;
  323. struct host_vm_change hvc;
  324. mm = &init_mm;
  325. hvc = INIT_HVC(mm, force, userspace);
  326. for (addr = start; addr < end;) {
  327. pgd = pgd_offset(mm, addr);
  328. if (!pgd_present(*pgd)) {
  329. last = ADD_ROUND(addr, PGDIR_SIZE);
  330. if (last > end)
  331. last = end;
  332. if (pgd_newpage(*pgd)) {
  333. updated = 1;
  334. err = add_munmap(addr, last - addr, &hvc);
  335. if (err < 0)
  336. panic("munmap failed, errno = %d\n",
  337. -err);
  338. }
  339. addr = last;
  340. continue;
  341. }
  342. p4d = p4d_offset(pgd, addr);
  343. if (!p4d_present(*p4d)) {
  344. last = ADD_ROUND(addr, P4D_SIZE);
  345. if (last > end)
  346. last = end;
  347. if (p4d_newpage(*p4d)) {
  348. updated = 1;
  349. err = add_munmap(addr, last - addr, &hvc);
  350. if (err < 0)
  351. panic("munmap failed, errno = %d\n",
  352. -err);
  353. }
  354. addr = last;
  355. continue;
  356. }
  357. pud = pud_offset(p4d, addr);
  358. if (!pud_present(*pud)) {
  359. last = ADD_ROUND(addr, PUD_SIZE);
  360. if (last > end)
  361. last = end;
  362. if (pud_newpage(*pud)) {
  363. updated = 1;
  364. err = add_munmap(addr, last - addr, &hvc);
  365. if (err < 0)
  366. panic("munmap failed, errno = %d\n",
  367. -err);
  368. }
  369. addr = last;
  370. continue;
  371. }
  372. pmd = pmd_offset(pud, addr);
  373. if (!pmd_present(*pmd)) {
  374. last = ADD_ROUND(addr, PMD_SIZE);
  375. if (last > end)
  376. last = end;
  377. if (pmd_newpage(*pmd)) {
  378. updated = 1;
  379. err = add_munmap(addr, last - addr, &hvc);
  380. if (err < 0)
  381. panic("munmap failed, errno = %d\n",
  382. -err);
  383. }
  384. addr = last;
  385. continue;
  386. }
  387. pte = pte_offset_kernel(pmd, addr);
  388. if (!pte_present(*pte) || pte_newpage(*pte)) {
  389. updated = 1;
  390. err = add_munmap(addr, PAGE_SIZE, &hvc);
  391. if (err < 0)
  392. panic("munmap failed, errno = %d\n",
  393. -err);
  394. if (pte_present(*pte))
  395. err = add_mmap(addr, pte_val(*pte) & PAGE_MASK,
  396. PAGE_SIZE, 0, &hvc);
  397. }
  398. else if (pte_newprot(*pte)) {
  399. updated = 1;
  400. err = add_mprotect(addr, PAGE_SIZE, 0, &hvc);
  401. }
  402. addr += PAGE_SIZE;
  403. }
  404. if (!err)
  405. err = do_ops(&hvc, hvc.index, 1);
  406. if (err < 0)
  407. panic("flush_tlb_kernel failed, errno = %d\n", err);
  408. return updated;
  409. }
  410. void flush_tlb_page(struct vm_area_struct *vma, unsigned long address)
  411. {
  412. pgd_t *pgd;
  413. p4d_t *p4d;
  414. pud_t *pud;
  415. pmd_t *pmd;
  416. pte_t *pte;
  417. struct mm_struct *mm = vma->vm_mm;
  418. void *flush = NULL;
  419. int r, w, x, prot, err = 0;
  420. struct mm_id *mm_id;
  421. address &= PAGE_MASK;
  422. pgd = pgd_offset(mm, address);
  423. if (!pgd_present(*pgd))
  424. goto kill;
  425. p4d = p4d_offset(pgd, address);
  426. if (!p4d_present(*p4d))
  427. goto kill;
  428. pud = pud_offset(p4d, address);
  429. if (!pud_present(*pud))
  430. goto kill;
  431. pmd = pmd_offset(pud, address);
  432. if (!pmd_present(*pmd))
  433. goto kill;
  434. pte = pte_offset_kernel(pmd, address);
  435. r = pte_read(*pte);
  436. w = pte_write(*pte);
  437. x = pte_exec(*pte);
  438. if (!pte_young(*pte)) {
  439. r = 0;
  440. w = 0;
  441. } else if (!pte_dirty(*pte)) {
  442. w = 0;
  443. }
  444. mm_id = &mm->context.id;
  445. prot = ((r ? UM_PROT_READ : 0) | (w ? UM_PROT_WRITE : 0) |
  446. (x ? UM_PROT_EXEC : 0));
  447. if (pte_newpage(*pte)) {
  448. if (pte_present(*pte)) {
  449. unsigned long long offset;
  450. int fd;
  451. fd = phys_mapping(pte_val(*pte) & PAGE_MASK, &offset);
  452. err = map(mm_id, address, PAGE_SIZE, prot, fd, offset,
  453. 1, &flush);
  454. }
  455. else err = unmap(mm_id, address, PAGE_SIZE, 1, &flush);
  456. }
  457. else if (pte_newprot(*pte))
  458. err = protect(mm_id, address, PAGE_SIZE, prot, 1, &flush);
  459. if (err) {
  460. if (err == -ENOMEM)
  461. report_enomem();
  462. goto kill;
  463. }
  464. *pte = pte_mkuptodate(*pte);
  465. return;
  466. kill:
  467. printk(KERN_ERR "Failed to flush page for address 0x%lx\n", address);
  468. force_sig(SIGKILL);
  469. }
  470. void flush_tlb_all(void)
  471. {
  472. /*
  473. * Don't bother flushing if this address space is about to be
  474. * destroyed.
  475. */
  476. if (atomic_read(&current->mm->mm_users) == 0)
  477. return;
  478. flush_tlb_mm(current->mm);
  479. }
  480. void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  481. {
  482. flush_tlb_kernel_range_common(start, end);
  483. }
  484. void flush_tlb_kernel_vm(void)
  485. {
  486. flush_tlb_kernel_range_common(start_vm, end_vm);
  487. }
  488. void __flush_tlb_one(unsigned long addr)
  489. {
  490. flush_tlb_kernel_range_common(addr, addr + PAGE_SIZE);
  491. }
  492. static void fix_range(struct mm_struct *mm, unsigned long start_addr,
  493. unsigned long end_addr, int force)
  494. {
  495. /*
  496. * Don't bother flushing if this address space is about to be
  497. * destroyed.
  498. */
  499. if (atomic_read(&mm->mm_users) == 0)
  500. return;
  501. fix_range_common(mm, start_addr, end_addr, force);
  502. }
  503. void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
  504. unsigned long end)
  505. {
  506. if (vma->vm_mm == NULL)
  507. flush_tlb_kernel_range_common(start, end);
  508. else fix_range(vma->vm_mm, start, end, 0);
  509. }
  510. EXPORT_SYMBOL(flush_tlb_range);
  511. void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
  512. unsigned long end)
  513. {
  514. fix_range(mm, start, end, 0);
  515. }
  516. void flush_tlb_mm(struct mm_struct *mm)
  517. {
  518. struct vm_area_struct *vma;
  519. VMA_ITERATOR(vmi, mm, 0);
  520. for_each_vma(vmi, vma)
  521. fix_range(mm, vma->vm_start, vma->vm_end, 0);
  522. }
  523. void force_flush_all(void)
  524. {
  525. struct mm_struct *mm = current->mm;
  526. struct vm_area_struct *vma;
  527. VMA_ITERATOR(vmi, mm, 0);
  528. for_each_vma(vmi, vma)
  529. fix_range(mm, vma->vm_start, vma->vm_end, 1);
  530. }