iommu.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2022 Google LLC
  4. * Author: David Brazdil <[email protected]>
  5. */
  6. #include <linux/kvm_host.h>
  7. #include <asm/kvm_asm.h>
  8. #include <asm/kvm_hyp.h>
  9. #include <asm/kvm_mmu.h>
  10. #include <asm/kvm_pkvm.h>
  11. #include <hyp/adjust_pc.h>
  12. #include <nvhe/iommu.h>
  13. #include <nvhe/mm.h>
  14. #include <nvhe/modules.h>
  15. #include <nvhe/pkvm.h>
  16. #define DRV_ID(drv_addr) ((unsigned long)drv_addr)
  17. enum {
  18. IOMMU_DRIVER_NOT_READY = 0,
  19. IOMMU_DRIVER_INITIALIZING,
  20. IOMMU_DRIVER_READY,
  21. };
  22. /* List of registered IOMMU drivers, protected with iommu_drv_lock. */
  23. static LIST_HEAD(iommu_drivers);
  24. /* IOMMU device list. Must only be accessed with host_mmu.lock held. */
  25. static LIST_HEAD(iommu_list);
  26. static bool iommu_finalized;
  27. static DEFINE_HYP_SPINLOCK(iommu_registration_lock);
  28. static DEFINE_HYP_SPINLOCK(iommu_drv_lock);
  29. static void *iommu_mem_pool;
  30. static size_t iommu_mem_remaining;
  31. static void assert_host_component_locked(void)
  32. {
  33. hyp_assert_lock_held(&host_mmu.lock);
  34. }
  35. static void host_lock_component(void)
  36. {
  37. hyp_spin_lock(&host_mmu.lock);
  38. }
  39. static void host_unlock_component(void)
  40. {
  41. hyp_spin_unlock(&host_mmu.lock);
  42. }
  43. /*
  44. * Find IOMMU driver by its ID. The input ID is treated as unstrusted
  45. * and is properly validated.
  46. */
  47. static inline struct pkvm_iommu_driver *get_driver(unsigned long id)
  48. {
  49. struct pkvm_iommu_driver *drv, *ret = NULL;
  50. hyp_spin_lock(&iommu_drv_lock);
  51. list_for_each_entry(drv, &iommu_drivers, list) {
  52. if (DRV_ID(drv) == id) {
  53. ret = drv;
  54. break;
  55. }
  56. }
  57. hyp_spin_unlock(&iommu_drv_lock);
  58. return ret;
  59. }
  60. static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv)
  61. {
  62. return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY,
  63. IOMMU_DRIVER_INITIALIZING)
  64. == IOMMU_DRIVER_NOT_READY;
  65. }
  66. static inline void driver_release_init(struct pkvm_iommu_driver *drv,
  67. bool success)
  68. {
  69. atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY
  70. : IOMMU_DRIVER_NOT_READY);
  71. }
  72. static inline bool is_driver_ready(struct pkvm_iommu_driver *drv)
  73. {
  74. return atomic_read(&drv->state) == IOMMU_DRIVER_READY;
  75. }
  76. static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv)
  77. {
  78. return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size,
  79. sizeof(unsigned long));
  80. }
  81. static bool validate_driver_id_unique(struct pkvm_iommu_driver *drv)
  82. {
  83. struct pkvm_iommu_driver *cur;
  84. hyp_assert_lock_held(&iommu_drv_lock);
  85. list_for_each_entry(cur, &iommu_drivers, list) {
  86. if (DRV_ID(drv) == DRV_ID(cur))
  87. return false;
  88. }
  89. return true;
  90. }
  91. static int __pkvm_register_iommu_driver(struct pkvm_iommu_driver *drv)
  92. {
  93. int ret = 0;
  94. if (!drv)
  95. return -EINVAL;
  96. hyp_assert_lock_held(&iommu_registration_lock);
  97. hyp_spin_lock(&iommu_drv_lock);
  98. if (validate_driver_id_unique(drv))
  99. list_add_tail(&drv->list, &iommu_drivers);
  100. else
  101. ret = -EEXIST;
  102. hyp_spin_unlock(&iommu_drv_lock);
  103. return ret;
  104. }
  105. /* Global memory pool for allocating IOMMU list entry structs. */
  106. static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv,
  107. void *mem, size_t mem_size)
  108. {
  109. size_t size = __iommu_alloc_size(drv);
  110. void *ptr;
  111. assert_host_component_locked();
  112. /*
  113. * If new memory is being provided, replace the existing pool with it.
  114. * Any remaining memory in the pool is discarded.
  115. */
  116. if (mem && mem_size) {
  117. iommu_mem_pool = mem;
  118. iommu_mem_remaining = mem_size;
  119. }
  120. if (size > iommu_mem_remaining)
  121. return NULL;
  122. ptr = iommu_mem_pool;
  123. iommu_mem_pool += size;
  124. iommu_mem_remaining -= size;
  125. return ptr;
  126. }
  127. static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr)
  128. {
  129. size_t size = __iommu_alloc_size(drv);
  130. assert_host_component_locked();
  131. if (!ptr)
  132. return;
  133. /* Only allow freeing the last allocated buffer. */
  134. if ((void *)ptr + size != iommu_mem_pool)
  135. return;
  136. iommu_mem_pool -= size;
  137. iommu_mem_remaining += size;
  138. }
  139. static bool is_overlap(phys_addr_t r1_start, size_t r1_size,
  140. phys_addr_t r2_start, size_t r2_size)
  141. {
  142. phys_addr_t r1_end = r1_start + r1_size;
  143. phys_addr_t r2_end = r2_start + r2_size;
  144. return (r1_start < r2_end) && (r2_start < r1_end);
  145. }
  146. static bool is_mmio_range(phys_addr_t base, size_t size)
  147. {
  148. struct memblock_region *reg;
  149. phys_addr_t limit = BIT(host_mmu.pgt.ia_bits);
  150. size_t i;
  151. /* Check against limits of host IPA space. */
  152. if ((base >= limit) || !size || (size > limit - base))
  153. return false;
  154. for (i = 0; i < hyp_memblock_nr; i++) {
  155. reg = &hyp_memory[i];
  156. if (is_overlap(base, size, reg->base, reg->size))
  157. return false;
  158. }
  159. return true;
  160. }
  161. static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level,
  162. kvm_pte_t *ptep,
  163. enum kvm_pgtable_walk_flags flags,
  164. void * const arg)
  165. {
  166. struct pkvm_iommu_driver * const drv = arg;
  167. u64 end = start + kvm_granule_size(level);
  168. kvm_pte_t pte = *ptep;
  169. /*
  170. * Valid stage-2 entries are created lazily, invalid ones eagerly.
  171. * Note: In the future we may need to check if [start,end) is MMIO.
  172. * Note: Drivers initialize their PTs to all memory owned by the host,
  173. * so we only call the driver on regions where that is not the case.
  174. */
  175. if (pte && !kvm_pte_valid(pte))
  176. drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0);
  177. return 0;
  178. }
  179. static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv)
  180. {
  181. struct kvm_pgtable_walker walker = {
  182. .cb = __snapshot_host_stage2,
  183. .arg = drv,
  184. .flags = KVM_PGTABLE_WALK_LEAF,
  185. };
  186. struct kvm_pgtable *pgt = &host_mmu.pgt;
  187. if (!drv->ops->host_stage2_idmap_prepare)
  188. return 0;
  189. return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker);
  190. }
  191. static bool validate_against_existing_iommus(struct pkvm_iommu *dev)
  192. {
  193. struct pkvm_iommu *other;
  194. assert_host_component_locked();
  195. list_for_each_entry(other, &iommu_list, list) {
  196. /* Device ID must be unique. */
  197. if (dev->id == other->id)
  198. return false;
  199. /* MMIO regions must not overlap. */
  200. if (is_overlap(dev->pa, dev->size, other->pa, other->size))
  201. return false;
  202. }
  203. return true;
  204. }
  205. static struct pkvm_iommu *find_iommu_by_id(unsigned long id)
  206. {
  207. struct pkvm_iommu *dev;
  208. assert_host_component_locked();
  209. list_for_each_entry(dev, &iommu_list, list) {
  210. if (dev->id == id)
  211. return dev;
  212. }
  213. return NULL;
  214. }
  215. /*
  216. * Initialize EL2 IOMMU driver.
  217. *
  218. * This is a common hypercall for driver initialization. Driver-specific
  219. * arguments are passed in a shared memory buffer. The driver is expected to
  220. * initialize it's page-table bookkeeping.
  221. */
  222. int __pkvm_iommu_driver_init(struct pkvm_iommu_driver *drv, void *data, size_t size)
  223. {
  224. const struct pkvm_iommu_ops *ops;
  225. int ret = 0;
  226. /* New driver initialization not allowed after __pkvm_iommu_finalize(). */
  227. hyp_spin_lock(&iommu_registration_lock);
  228. if (iommu_finalized) {
  229. ret = -EPERM;
  230. goto out_unlock;
  231. }
  232. ret = __pkvm_register_iommu_driver(drv);
  233. if (ret)
  234. return ret;
  235. if (!drv->ops) {
  236. ret = -EINVAL;
  237. goto out_unlock;
  238. }
  239. if (!driver_acquire_init(drv)) {
  240. ret = -EBUSY;
  241. goto out_unlock;
  242. }
  243. ops = drv->ops;
  244. /* This can change stage-2 mappings. */
  245. if (ops->init) {
  246. ret = hyp_pin_shared_mem(data, data + size);
  247. if (!ret) {
  248. ret = ops->init(data, size);
  249. hyp_unpin_shared_mem(data, data + size);
  250. }
  251. if (ret)
  252. goto out_release;
  253. }
  254. /*
  255. * Walk host stage-2 and pass current mappings to the driver. Start
  256. * accepting host stage-2 updates as soon as the host lock is released.
  257. */
  258. host_lock_component();
  259. ret = snapshot_host_stage2(drv);
  260. if (!ret)
  261. driver_release_init(drv, /*success=*/true);
  262. host_unlock_component();
  263. out_release:
  264. if (ret)
  265. driver_release_init(drv, /*success=*/false);
  266. out_unlock:
  267. hyp_spin_unlock(&iommu_registration_lock);
  268. return ret;
  269. }
  270. int __pkvm_iommu_register(unsigned long dev_id, unsigned long drv_id,
  271. phys_addr_t dev_pa, size_t dev_size,
  272. unsigned long parent_id, u8 flags,
  273. void *kern_mem_va)
  274. {
  275. struct pkvm_iommu *dev = NULL;
  276. struct pkvm_iommu_driver *drv;
  277. void *mem_va = NULL;
  278. int ret = 0;
  279. /* New device registration not allowed after __pkvm_iommu_finalize(). */
  280. hyp_spin_lock(&iommu_registration_lock);
  281. if (iommu_finalized) {
  282. ret = -EPERM;
  283. goto out_unlock;
  284. }
  285. drv = get_driver(drv_id);
  286. if (!drv || !is_driver_ready(drv)) {
  287. ret = -ENOENT;
  288. goto out_unlock;
  289. }
  290. if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) {
  291. ret = -EINVAL;
  292. goto out_unlock;
  293. }
  294. if (!is_mmio_range(dev_pa, dev_size)) {
  295. ret = -EINVAL;
  296. goto out_unlock;
  297. }
  298. /*
  299. * Accept memory donation if the host is providing new memory.
  300. * Note: We do not return the memory even if there is an error later.
  301. */
  302. if (kern_mem_va) {
  303. mem_va = kern_hyp_va(kern_mem_va);
  304. if (!PAGE_ALIGNED(mem_va)) {
  305. ret = -EINVAL;
  306. goto out_unlock;
  307. }
  308. ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va), 1);
  309. if (ret)
  310. goto out_unlock;
  311. }
  312. host_lock_component();
  313. /* Allocate memory for the new device entry. */
  314. dev = alloc_iommu(drv, mem_va, PAGE_SIZE);
  315. if (!dev) {
  316. ret = -ENOMEM;
  317. goto out_free;
  318. }
  319. /* Populate the new device entry. */
  320. *dev = (struct pkvm_iommu){
  321. .children = LIST_HEAD_INIT(dev->children),
  322. .id = dev_id,
  323. .ops = drv->ops,
  324. .pa = dev_pa,
  325. .va = hyp_phys_to_virt(dev_pa),
  326. .size = dev_size,
  327. .flags = flags,
  328. };
  329. if (!validate_against_existing_iommus(dev)) {
  330. ret = -EBUSY;
  331. goto out_free;
  332. }
  333. if (parent_id) {
  334. dev->parent = find_iommu_by_id(parent_id);
  335. if (!dev->parent) {
  336. ret = -EINVAL;
  337. goto out_free;
  338. }
  339. if (dev->parent->ops->validate_child) {
  340. ret = dev->parent->ops->validate_child(dev->parent, dev);
  341. if (ret)
  342. goto out_free;
  343. }
  344. }
  345. if (dev->ops->validate) {
  346. ret = dev->ops->validate(dev);
  347. if (ret)
  348. goto out_free;
  349. }
  350. ret = __pkvm_host_donate_hyp_locked(hyp_phys_to_pfn(dev_pa),
  351. PAGE_ALIGN(dev_size) >> PAGE_SHIFT);
  352. if (ret)
  353. goto out_free;
  354. /* Register device and prevent host from mapping the MMIO range. */
  355. list_add_tail(&dev->list, &iommu_list);
  356. if (dev->parent)
  357. list_add_tail(&dev->siblings, &dev->parent->children);
  358. out_free:
  359. if (ret)
  360. free_iommu(drv, dev);
  361. host_unlock_component();
  362. out_unlock:
  363. hyp_spin_unlock(&iommu_registration_lock);
  364. return ret;
  365. }
  366. int __pkvm_iommu_finalize(int err)
  367. {
  368. int ret = 0;
  369. /* Err is not currently used in EL2.*/
  370. WARN_ON(err);
  371. hyp_spin_lock(&iommu_registration_lock);
  372. if (!iommu_finalized)
  373. iommu_finalized = true;
  374. else
  375. ret = -EPERM;
  376. hyp_spin_unlock(&iommu_registration_lock);
  377. return ret;
  378. }
  379. int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event)
  380. {
  381. struct pkvm_iommu *dev;
  382. int ret;
  383. host_lock_component();
  384. dev = find_iommu_by_id(dev_id);
  385. if (dev) {
  386. if (event == PKVM_IOMMU_PM_SUSPEND) {
  387. ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0;
  388. if (!ret)
  389. dev->powered = false;
  390. } else if (event == PKVM_IOMMU_PM_RESUME) {
  391. ret = dev->ops->resume ? dev->ops->resume(dev) : 0;
  392. if (!ret)
  393. dev->powered = true;
  394. } else {
  395. ret = -EINVAL;
  396. }
  397. } else {
  398. ret = -ENODEV;
  399. }
  400. host_unlock_component();
  401. return ret;
  402. }
  403. bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr,
  404. phys_addr_t pa)
  405. {
  406. struct pkvm_iommu *dev;
  407. assert_host_component_locked();
  408. list_for_each_entry(dev, &iommu_list, list) {
  409. if (pa < dev->pa || pa >= dev->pa + dev->size)
  410. continue;
  411. /* No 'powered' check - the host assumes it is powered. */
  412. if (!dev->ops->host_dabt_handler ||
  413. !dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa))
  414. return false;
  415. kvm_skip_host_instr();
  416. return true;
  417. }
  418. return false;
  419. }
  420. void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end,
  421. enum kvm_pgtable_prot prot)
  422. {
  423. struct pkvm_iommu_driver *drv;
  424. struct pkvm_iommu *dev;
  425. assert_host_component_locked();
  426. hyp_spin_lock(&iommu_drv_lock);
  427. list_for_each_entry(drv, &iommu_drivers, list) {
  428. if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare)
  429. drv->ops->host_stage2_idmap_prepare(start, end, prot);
  430. }
  431. hyp_spin_unlock(&iommu_drv_lock);
  432. list_for_each_entry(dev, &iommu_list, list) {
  433. if (dev->powered && dev->ops->host_stage2_idmap_apply)
  434. dev->ops->host_stage2_idmap_apply(dev, start, end);
  435. }
  436. list_for_each_entry(dev, &iommu_list, list) {
  437. if (dev->powered && dev->ops->host_stage2_idmap_complete)
  438. dev->ops->host_stage2_idmap_complete(dev);
  439. }
  440. }