mte.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2020 ARM Ltd.
  4. */
  5. #include <linux/bitops.h>
  6. #include <linux/cpu.h>
  7. #include <linux/kernel.h>
  8. #include <linux/mm.h>
  9. #include <linux/prctl.h>
  10. #include <linux/sched.h>
  11. #include <linux/sched/mm.h>
  12. #include <linux/string.h>
  13. #include <linux/swap.h>
  14. #include <linux/swapops.h>
  15. #include <linux/thread_info.h>
  16. #include <linux/types.h>
  17. #include <linux/uaccess.h>
  18. #include <linux/uio.h>
  19. #include <asm/barrier.h>
  20. #include <asm/cpufeature.h>
  21. #include <asm/mte.h>
  22. #include <asm/ptrace.h>
  23. #include <asm/sysreg.h>
  24. static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
  25. #ifdef CONFIG_KASAN_HW_TAGS
  26. /*
  27. * The asynchronous and asymmetric MTE modes have the same behavior for
  28. * store operations. This flag is set when either of these modes is enabled.
  29. */
  30. DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode);
  31. EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode);
  32. #endif
  33. void mte_sync_tags(pte_t pte)
  34. {
  35. struct page *page = pte_page(pte);
  36. long i, nr_pages = compound_nr(page);
  37. /* if PG_mte_tagged is set, tags have already been initialised */
  38. for (i = 0; i < nr_pages; i++, page++) {
  39. if (!page_mte_tagged(page)) {
  40. mte_clear_page_tags(page_address(page));
  41. set_page_mte_tagged(page);
  42. }
  43. }
  44. /* ensure the tags are visible before the PTE is set */
  45. smp_wmb();
  46. }
  47. int memcmp_pages(struct page *page1, struct page *page2)
  48. {
  49. char *addr1, *addr2;
  50. int ret;
  51. addr1 = page_address(page1);
  52. addr2 = page_address(page2);
  53. ret = memcmp(addr1, addr2, PAGE_SIZE);
  54. if (!system_supports_mte() || ret)
  55. return ret;
  56. /*
  57. * If the page content is identical but at least one of the pages is
  58. * tagged, return non-zero to avoid KSM merging. If only one of the
  59. * pages is tagged, set_pte_at() may zero or change the tags of the
  60. * other page via mte_sync_tags().
  61. */
  62. if (page_mte_tagged(page1) || page_mte_tagged(page2))
  63. return addr1 != addr2;
  64. return ret;
  65. }
  66. static inline void __mte_enable_kernel(const char *mode, unsigned long tcf)
  67. {
  68. /* Enable MTE Sync Mode for EL1. */
  69. sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF_MASK,
  70. SYS_FIELD_PREP(SCTLR_EL1, TCF, tcf));
  71. isb();
  72. pr_info_once("MTE: enabled in %s mode at EL1\n", mode);
  73. }
  74. #ifdef CONFIG_KASAN_HW_TAGS
  75. void mte_enable_kernel_sync(void)
  76. {
  77. /*
  78. * Make sure we enter this function when no PE has set
  79. * async mode previously.
  80. */
  81. WARN_ONCE(system_uses_mte_async_or_asymm_mode(),
  82. "MTE async mode enabled system wide!");
  83. __mte_enable_kernel("synchronous", SCTLR_EL1_TCF_SYNC);
  84. }
  85. void mte_enable_kernel_async(void)
  86. {
  87. __mte_enable_kernel("asynchronous", SCTLR_EL1_TCF_ASYNC);
  88. /*
  89. * MTE async mode is set system wide by the first PE that
  90. * executes this function.
  91. *
  92. * Note: If in future KASAN acquires a runtime switching
  93. * mode in between sync and async, this strategy needs
  94. * to be reviewed.
  95. */
  96. if (!system_uses_mte_async_or_asymm_mode())
  97. static_branch_enable(&mte_async_or_asymm_mode);
  98. }
  99. void mte_enable_kernel_asymm(void)
  100. {
  101. if (cpus_have_cap(ARM64_MTE_ASYMM)) {
  102. __mte_enable_kernel("asymmetric", SCTLR_EL1_TCF_ASYMM);
  103. /*
  104. * MTE asymm mode behaves as async mode for store
  105. * operations. The mode is set system wide by the
  106. * first PE that executes this function.
  107. *
  108. * Note: If in future KASAN acquires a runtime switching
  109. * mode in between sync and async, this strategy needs
  110. * to be reviewed.
  111. */
  112. if (!system_uses_mte_async_or_asymm_mode())
  113. static_branch_enable(&mte_async_or_asymm_mode);
  114. } else {
  115. /*
  116. * If the CPU does not support MTE asymmetric mode the
  117. * kernel falls back on synchronous mode which is the
  118. * default for kasan=on.
  119. */
  120. mte_enable_kernel_sync();
  121. }
  122. }
  123. #endif
  124. #ifdef CONFIG_KASAN_HW_TAGS
  125. void mte_check_tfsr_el1(void)
  126. {
  127. u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1);
  128. if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) {
  129. /*
  130. * Note: isb() is not required after this direct write
  131. * because there is no indirect read subsequent to it
  132. * (per ARM DDI 0487F.c table D13-1).
  133. */
  134. write_sysreg_s(0, SYS_TFSR_EL1);
  135. kasan_report_async();
  136. }
  137. }
  138. #endif
  139. /*
  140. * This is where we actually resolve the system and process MTE mode
  141. * configuration into an actual value in SCTLR_EL1 that affects
  142. * userspace.
  143. */
  144. static void mte_update_sctlr_user(struct task_struct *task)
  145. {
  146. /*
  147. * This must be called with preemption disabled and can only be called
  148. * on the current or next task since the CPU must match where the thread
  149. * is going to run. The caller is responsible for calling
  150. * update_sctlr_el1() later in the same preemption disabled block.
  151. */
  152. unsigned long sctlr = task->thread.sctlr_user;
  153. unsigned long mte_ctrl = task->thread.mte_ctrl;
  154. unsigned long pref, resolved_mte_tcf;
  155. pref = __this_cpu_read(mte_tcf_preferred);
  156. /*
  157. * If there is no overlap between the system preferred and
  158. * program requested values go with what was requested.
  159. */
  160. resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
  161. sctlr &= ~SCTLR_EL1_TCF0_MASK;
  162. /*
  163. * Pick an actual setting. The order in which we check for
  164. * set bits and map into register values determines our
  165. * default order.
  166. */
  167. if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM)
  168. sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYMM);
  169. else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
  170. sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, ASYNC);
  171. else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
  172. sctlr |= SYS_FIELD_PREP_ENUM(SCTLR_EL1, TCF0, SYNC);
  173. task->thread.sctlr_user = sctlr;
  174. }
  175. static void mte_update_gcr_excl(struct task_struct *task)
  176. {
  177. /*
  178. * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by
  179. * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled.
  180. */
  181. if (kasan_hw_tags_enabled())
  182. return;
  183. write_sysreg_s(
  184. ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
  185. SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND,
  186. SYS_GCR_EL1);
  187. }
  188. #ifdef CONFIG_KASAN_HW_TAGS
  189. /* Only called from assembly, silence sparse */
  190. void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
  191. __le32 *updptr, int nr_inst);
  192. void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
  193. __le32 *updptr, int nr_inst)
  194. {
  195. BUG_ON(nr_inst != 1); /* Branch -> NOP */
  196. if (kasan_hw_tags_enabled())
  197. *updptr = cpu_to_le32(aarch64_insn_gen_nop());
  198. }
  199. #endif
  200. void mte_thread_init_user(void)
  201. {
  202. if (!system_supports_mte())
  203. return;
  204. /* clear any pending asynchronous tag fault */
  205. dsb(ish);
  206. write_sysreg_s(0, SYS_TFSRE0_EL1);
  207. clear_thread_flag(TIF_MTE_ASYNC_FAULT);
  208. /* disable tag checking and reset tag generation mask */
  209. set_mte_ctrl(current, 0);
  210. }
  211. void mte_thread_switch(struct task_struct *next)
  212. {
  213. if (!system_supports_mte())
  214. return;
  215. mte_update_sctlr_user(next);
  216. mte_update_gcr_excl(next);
  217. /* TCO may not have been disabled on exception entry for the current task. */
  218. mte_disable_tco_entry(next);
  219. /*
  220. * Check if an async tag exception occurred at EL1.
  221. *
  222. * Note: On the context switch path we rely on the dsb() present
  223. * in __switch_to() to guarantee that the indirect writes to TFSR_EL1
  224. * are synchronized before this point.
  225. */
  226. isb();
  227. mte_check_tfsr_el1();
  228. }
  229. void mte_cpu_setup(void)
  230. {
  231. u64 rgsr;
  232. /*
  233. * CnP must be enabled only after the MAIR_EL1 register has been set
  234. * up. Inconsistent MAIR_EL1 between CPUs sharing the same TLB may
  235. * lead to the wrong memory type being used for a brief window during
  236. * CPU power-up.
  237. *
  238. * CnP is not a boot feature so MTE gets enabled before CnP, but let's
  239. * make sure that is the case.
  240. */
  241. BUG_ON(read_sysreg(ttbr0_el1) & TTBR_CNP_BIT);
  242. BUG_ON(read_sysreg(ttbr1_el1) & TTBR_CNP_BIT);
  243. /* Normal Tagged memory type at the corresponding MAIR index */
  244. sysreg_clear_set(mair_el1,
  245. MAIR_ATTRIDX(MAIR_ATTR_MASK, MT_NORMAL_TAGGED),
  246. MAIR_ATTRIDX(MAIR_ATTR_NORMAL_TAGGED,
  247. MT_NORMAL_TAGGED));
  248. write_sysreg_s(KERNEL_GCR_EL1, SYS_GCR_EL1);
  249. /*
  250. * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
  251. * RGSR_EL1.SEED must be non-zero for IRG to produce
  252. * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
  253. * must initialize it.
  254. */
  255. rgsr = (read_sysreg(CNTVCT_EL0) & SYS_RGSR_EL1_SEED_MASK) <<
  256. SYS_RGSR_EL1_SEED_SHIFT;
  257. if (rgsr == 0)
  258. rgsr = 1 << SYS_RGSR_EL1_SEED_SHIFT;
  259. write_sysreg_s(rgsr, SYS_RGSR_EL1);
  260. /* clear any pending tag check faults in TFSR*_EL1 */
  261. write_sysreg_s(0, SYS_TFSR_EL1);
  262. write_sysreg_s(0, SYS_TFSRE0_EL1);
  263. local_flush_tlb_all();
  264. }
  265. void mte_suspend_enter(void)
  266. {
  267. if (!system_supports_mte())
  268. return;
  269. /*
  270. * The barriers are required to guarantee that the indirect writes
  271. * to TFSR_EL1 are synchronized before we report the state.
  272. */
  273. dsb(nsh);
  274. isb();
  275. /* Report SYS_TFSR_EL1 before suspend entry */
  276. mte_check_tfsr_el1();
  277. }
  278. void mte_suspend_exit(void)
  279. {
  280. if (!system_supports_mte())
  281. return;
  282. mte_cpu_setup();
  283. }
  284. long set_mte_ctrl(struct task_struct *task, unsigned long arg)
  285. {
  286. u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
  287. SYS_GCR_EL1_EXCL_MASK) << MTE_CTRL_GCR_USER_EXCL_SHIFT;
  288. if (!system_supports_mte())
  289. return 0;
  290. if (arg & PR_MTE_TCF_ASYNC)
  291. mte_ctrl |= MTE_CTRL_TCF_ASYNC;
  292. if (arg & PR_MTE_TCF_SYNC)
  293. mte_ctrl |= MTE_CTRL_TCF_SYNC;
  294. /*
  295. * If the system supports it and both sync and async modes are
  296. * specified then implicitly enable asymmetric mode.
  297. * Userspace could see a mix of both sync and async anyway due
  298. * to differing or changing defaults on CPUs.
  299. */
  300. if (cpus_have_cap(ARM64_MTE_ASYMM) &&
  301. (arg & PR_MTE_TCF_ASYNC) &&
  302. (arg & PR_MTE_TCF_SYNC))
  303. mte_ctrl |= MTE_CTRL_TCF_ASYMM;
  304. task->thread.mte_ctrl = mte_ctrl;
  305. if (task == current) {
  306. preempt_disable();
  307. mte_update_sctlr_user(task);
  308. mte_update_gcr_excl(task);
  309. update_sctlr_el1(task->thread.sctlr_user);
  310. preempt_enable();
  311. }
  312. return 0;
  313. }
  314. long get_mte_ctrl(struct task_struct *task)
  315. {
  316. unsigned long ret;
  317. u64 mte_ctrl = task->thread.mte_ctrl;
  318. u64 incl = (~mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
  319. SYS_GCR_EL1_EXCL_MASK;
  320. if (!system_supports_mte())
  321. return 0;
  322. ret = incl << PR_MTE_TAG_SHIFT;
  323. if (mte_ctrl & MTE_CTRL_TCF_ASYNC)
  324. ret |= PR_MTE_TCF_ASYNC;
  325. if (mte_ctrl & MTE_CTRL_TCF_SYNC)
  326. ret |= PR_MTE_TCF_SYNC;
  327. return ret;
  328. }
  329. /*
  330. * Access MTE tags in another process' address space as given in mm. Update
  331. * the number of tags copied. Return 0 if any tags copied, error otherwise.
  332. * Inspired by __access_remote_vm().
  333. */
  334. static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
  335. struct iovec *kiov, unsigned int gup_flags)
  336. {
  337. struct vm_area_struct *vma;
  338. void __user *buf = kiov->iov_base;
  339. size_t len = kiov->iov_len;
  340. int ret;
  341. int write = gup_flags & FOLL_WRITE;
  342. if (!access_ok(buf, len))
  343. return -EFAULT;
  344. if (mmap_read_lock_killable(mm))
  345. return -EIO;
  346. while (len) {
  347. unsigned long tags, offset;
  348. void *maddr;
  349. struct page *page = NULL;
  350. ret = get_user_pages_remote(mm, addr, 1, gup_flags, &page,
  351. &vma, NULL);
  352. if (ret <= 0)
  353. break;
  354. /*
  355. * Only copy tags if the page has been mapped as PROT_MTE
  356. * (PG_mte_tagged set). Otherwise the tags are not valid and
  357. * not accessible to user. Moreover, an mprotect(PROT_MTE)
  358. * would cause the existing tags to be cleared if the page
  359. * was never mapped with PROT_MTE.
  360. */
  361. if (!(vma->vm_flags & VM_MTE)) {
  362. ret = -EOPNOTSUPP;
  363. put_page(page);
  364. break;
  365. }
  366. WARN_ON_ONCE(!page_mte_tagged(page));
  367. /* limit access to the end of the page */
  368. offset = offset_in_page(addr);
  369. tags = min(len, (PAGE_SIZE - offset) / MTE_GRANULE_SIZE);
  370. maddr = page_address(page);
  371. if (write) {
  372. tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
  373. set_page_dirty_lock(page);
  374. } else {
  375. tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
  376. }
  377. put_page(page);
  378. /* error accessing the tracer's buffer */
  379. if (!tags)
  380. break;
  381. len -= tags;
  382. buf += tags;
  383. addr += tags * MTE_GRANULE_SIZE;
  384. }
  385. mmap_read_unlock(mm);
  386. /* return an error if no tags copied */
  387. kiov->iov_len = buf - kiov->iov_base;
  388. if (!kiov->iov_len) {
  389. /* check for error accessing the tracee's address space */
  390. if (ret <= 0)
  391. return -EIO;
  392. else
  393. return -EFAULT;
  394. }
  395. return 0;
  396. }
  397. /*
  398. * Copy MTE tags in another process' address space at 'addr' to/from tracer's
  399. * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
  400. */
  401. static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
  402. struct iovec *kiov, unsigned int gup_flags)
  403. {
  404. struct mm_struct *mm;
  405. int ret;
  406. mm = get_task_mm(tsk);
  407. if (!mm)
  408. return -EPERM;
  409. if (!tsk->ptrace || (current != tsk->parent) ||
  410. ((get_dumpable(mm) != SUID_DUMP_USER) &&
  411. !ptracer_capable(tsk, mm->user_ns))) {
  412. mmput(mm);
  413. return -EPERM;
  414. }
  415. ret = __access_remote_tags(mm, addr, kiov, gup_flags);
  416. mmput(mm);
  417. return ret;
  418. }
  419. int mte_ptrace_copy_tags(struct task_struct *child, long request,
  420. unsigned long addr, unsigned long data)
  421. {
  422. int ret;
  423. struct iovec kiov;
  424. struct iovec __user *uiov = (void __user *)data;
  425. unsigned int gup_flags = FOLL_FORCE;
  426. if (!system_supports_mte())
  427. return -EIO;
  428. if (get_user(kiov.iov_base, &uiov->iov_base) ||
  429. get_user(kiov.iov_len, &uiov->iov_len))
  430. return -EFAULT;
  431. if (request == PTRACE_POKEMTETAGS)
  432. gup_flags |= FOLL_WRITE;
  433. /* align addr to the MTE tag granule */
  434. addr &= MTE_GRANULE_MASK;
  435. ret = access_remote_tags(child, addr, &kiov, gup_flags);
  436. if (!ret)
  437. ret = put_user(kiov.iov_len, &uiov->iov_len);
  438. return ret;
  439. }
  440. static ssize_t mte_tcf_preferred_show(struct device *dev,
  441. struct device_attribute *attr, char *buf)
  442. {
  443. switch (per_cpu(mte_tcf_preferred, dev->id)) {
  444. case MTE_CTRL_TCF_ASYNC:
  445. return sysfs_emit(buf, "async\n");
  446. case MTE_CTRL_TCF_SYNC:
  447. return sysfs_emit(buf, "sync\n");
  448. case MTE_CTRL_TCF_ASYMM:
  449. return sysfs_emit(buf, "asymm\n");
  450. default:
  451. return sysfs_emit(buf, "???\n");
  452. }
  453. }
  454. static ssize_t mte_tcf_preferred_store(struct device *dev,
  455. struct device_attribute *attr,
  456. const char *buf, size_t count)
  457. {
  458. u64 tcf;
  459. if (sysfs_streq(buf, "async"))
  460. tcf = MTE_CTRL_TCF_ASYNC;
  461. else if (sysfs_streq(buf, "sync"))
  462. tcf = MTE_CTRL_TCF_SYNC;
  463. else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm"))
  464. tcf = MTE_CTRL_TCF_ASYMM;
  465. else
  466. return -EINVAL;
  467. device_lock(dev);
  468. per_cpu(mte_tcf_preferred, dev->id) = tcf;
  469. device_unlock(dev);
  470. return count;
  471. }
  472. static DEVICE_ATTR_RW(mte_tcf_preferred);
  473. static int register_mte_tcf_preferred_sysctl(void)
  474. {
  475. unsigned int cpu;
  476. if (!system_supports_mte())
  477. return 0;
  478. for_each_possible_cpu(cpu) {
  479. per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
  480. device_create_file(get_cpu_device(cpu),
  481. &dev_attr_mte_tcf_preferred);
  482. }
  483. return 0;
  484. }
  485. subsys_initcall(register_mte_tcf_preferred_sysctl);
  486. /*
  487. * Return 0 on success, the number of bytes not probed otherwise.
  488. */
  489. size_t mte_probe_user_range(const char __user *uaddr, size_t size)
  490. {
  491. const char __user *end = uaddr + size;
  492. int err = 0;
  493. char val;
  494. __raw_get_user(val, uaddr, err);
  495. if (err)
  496. return size;
  497. uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE);
  498. while (uaddr < end) {
  499. /*
  500. * A read is sufficient for mte, the caller should have probed
  501. * for the pte write permission if required.
  502. */
  503. __raw_get_user(val, uaddr, err);
  504. if (err)
  505. return end - uaddr;
  506. uaddr += MTE_GRANULE_SIZE;
  507. }
  508. (void)val;
  509. return 0;
  510. }