io-pgfault.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Handle device page faults
  4. *
  5. * Copyright (C) 2020 ARM Ltd.
  6. */
  7. #include <linux/iommu.h>
  8. #include <linux/list.h>
  9. #include <linux/sched/mm.h>
  10. #include <linux/slab.h>
  11. #include <linux/workqueue.h>
  12. #include "iommu-sva.h"
  13. /**
  14. * struct iopf_queue - IO Page Fault queue
  15. * @wq: the fault workqueue
  16. * @devices: devices attached to this queue
  17. * @lock: protects the device list
  18. */
  19. struct iopf_queue {
  20. struct workqueue_struct *wq;
  21. struct list_head devices;
  22. struct mutex lock;
  23. };
  24. /**
  25. * struct iopf_device_param - IO Page Fault data attached to a device
  26. * @dev: the device that owns this param
  27. * @queue: IOPF queue
  28. * @queue_list: index into queue->devices
  29. * @partial: faults that are part of a Page Request Group for which the last
  30. * request hasn't been submitted yet.
  31. */
  32. struct iopf_device_param {
  33. struct device *dev;
  34. struct iopf_queue *queue;
  35. struct list_head queue_list;
  36. struct list_head partial;
  37. };
  38. struct iopf_fault {
  39. struct iommu_fault fault;
  40. struct list_head list;
  41. };
  42. struct iopf_group {
  43. struct iopf_fault last_fault;
  44. struct list_head faults;
  45. struct work_struct work;
  46. struct device *dev;
  47. };
  48. static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
  49. enum iommu_page_response_code status)
  50. {
  51. struct iommu_page_response resp = {
  52. .version = IOMMU_PAGE_RESP_VERSION_1,
  53. .pasid = iopf->fault.prm.pasid,
  54. .grpid = iopf->fault.prm.grpid,
  55. .code = status,
  56. };
  57. if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
  58. (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
  59. resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
  60. return iommu_page_response(dev, &resp);
  61. }
  62. static void iopf_handler(struct work_struct *work)
  63. {
  64. struct iopf_group *group;
  65. struct iommu_domain *domain;
  66. struct iopf_fault *iopf, *next;
  67. enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
  68. group = container_of(work, struct iopf_group, work);
  69. domain = iommu_get_domain_for_dev_pasid(group->dev,
  70. group->last_fault.fault.prm.pasid, 0);
  71. if (!domain || !domain->iopf_handler)
  72. status = IOMMU_PAGE_RESP_INVALID;
  73. list_for_each_entry_safe(iopf, next, &group->faults, list) {
  74. /*
  75. * For the moment, errors are sticky: don't handle subsequent
  76. * faults in the group if there is an error.
  77. */
  78. if (status == IOMMU_PAGE_RESP_SUCCESS)
  79. status = domain->iopf_handler(&iopf->fault,
  80. domain->fault_data);
  81. if (!(iopf->fault.prm.flags &
  82. IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
  83. kfree(iopf);
  84. }
  85. iopf_complete_group(group->dev, &group->last_fault, status);
  86. kfree(group);
  87. }
  88. /**
  89. * iommu_queue_iopf - IO Page Fault handler
  90. * @fault: fault event
  91. * @cookie: struct device, passed to iommu_register_device_fault_handler.
  92. *
  93. * Add a fault to the device workqueue, to be handled by mm.
  94. *
  95. * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
  96. * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
  97. * expect a response. It may be generated when disabling a PASID (issuing a
  98. * PASID stop request) by some PCI devices.
  99. *
  100. * The PASID stop request is issued by the device driver before unbind(). Once
  101. * it completes, no page request is generated for this PASID anymore and
  102. * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
  103. * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
  104. * for all outstanding page requests to come back with a response before
  105. * completing the PASID stop request. Others do not wait for page responses, and
  106. * instead issue this Stop Marker that tells us when the PASID can be
  107. * reallocated.
  108. *
  109. * It is safe to discard the Stop Marker because it is an optimization.
  110. * a. Page requests, which are posted requests, have been flushed to the IOMMU
  111. * when the stop request completes.
  112. * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
  113. * PASID.
  114. *
  115. * So even though the Stop Marker might be issued by the device *after* the stop
  116. * request completes, outstanding faults will have been dealt with by the time
  117. * the PASID is freed.
  118. *
  119. * Any valid page fault will be eventually routed to an iommu domain and the
  120. * page fault handler installed there will get called. The users of this
  121. * handling framework should guarantee that the iommu domain could only be
  122. * freed after the device has stopped generating page faults (or the iommu
  123. * hardware has been set to block the page faults) and the pending page faults
  124. * have been flushed.
  125. *
  126. * Return: 0 on success and <0 on error.
  127. */
  128. int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
  129. {
  130. int ret;
  131. struct iopf_group *group;
  132. struct iopf_fault *iopf, *next;
  133. struct iopf_device_param *iopf_param;
  134. struct device *dev = cookie;
  135. struct dev_iommu *param = dev->iommu;
  136. lockdep_assert_held(&param->lock);
  137. if (fault->type != IOMMU_FAULT_PAGE_REQ)
  138. /* Not a recoverable page fault */
  139. return -EOPNOTSUPP;
  140. /*
  141. * As long as we're holding param->lock, the queue can't be unlinked
  142. * from the device and therefore cannot disappear.
  143. */
  144. iopf_param = param->iopf_param;
  145. if (!iopf_param)
  146. return -ENODEV;
  147. if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
  148. iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
  149. if (!iopf)
  150. return -ENOMEM;
  151. iopf->fault = *fault;
  152. /* Non-last request of a group. Postpone until the last one */
  153. list_add(&iopf->list, &iopf_param->partial);
  154. return 0;
  155. }
  156. group = kzalloc(sizeof(*group), GFP_KERNEL);
  157. if (!group) {
  158. /*
  159. * The caller will send a response to the hardware. But we do
  160. * need to clean up before leaving, otherwise partial faults
  161. * will be stuck.
  162. */
  163. ret = -ENOMEM;
  164. goto cleanup_partial;
  165. }
  166. group->dev = dev;
  167. group->last_fault.fault = *fault;
  168. INIT_LIST_HEAD(&group->faults);
  169. list_add(&group->last_fault.list, &group->faults);
  170. INIT_WORK(&group->work, iopf_handler);
  171. /* See if we have partial faults for this group */
  172. list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
  173. if (iopf->fault.prm.grpid == fault->prm.grpid)
  174. /* Insert *before* the last fault */
  175. list_move(&iopf->list, &group->faults);
  176. }
  177. queue_work(iopf_param->queue->wq, &group->work);
  178. return 0;
  179. cleanup_partial:
  180. list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
  181. if (iopf->fault.prm.grpid == fault->prm.grpid) {
  182. list_del(&iopf->list);
  183. kfree(iopf);
  184. }
  185. }
  186. return ret;
  187. }
  188. EXPORT_SYMBOL_GPL(iommu_queue_iopf);
  189. /**
  190. * iopf_queue_flush_dev - Ensure that all queued faults have been processed
  191. * @dev: the endpoint whose faults need to be flushed.
  192. *
  193. * The IOMMU driver calls this before releasing a PASID, to ensure that all
  194. * pending faults for this PASID have been handled, and won't hit the address
  195. * space of the next process that uses this PASID. The driver must make sure
  196. * that no new fault is added to the queue. In particular it must flush its
  197. * low-level queue before calling this function.
  198. *
  199. * Return: 0 on success and <0 on error.
  200. */
  201. int iopf_queue_flush_dev(struct device *dev)
  202. {
  203. int ret = 0;
  204. struct iopf_device_param *iopf_param;
  205. struct dev_iommu *param = dev->iommu;
  206. if (!param)
  207. return -ENODEV;
  208. mutex_lock(&param->lock);
  209. iopf_param = param->iopf_param;
  210. if (iopf_param)
  211. flush_workqueue(iopf_param->queue->wq);
  212. else
  213. ret = -ENODEV;
  214. mutex_unlock(&param->lock);
  215. return ret;
  216. }
  217. EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
  218. /**
  219. * iopf_queue_discard_partial - Remove all pending partial fault
  220. * @queue: the queue whose partial faults need to be discarded
  221. *
  222. * When the hardware queue overflows, last page faults in a group may have been
  223. * lost and the IOMMU driver calls this to discard all partial faults. The
  224. * driver shouldn't be adding new faults to this queue concurrently.
  225. *
  226. * Return: 0 on success and <0 on error.
  227. */
  228. int iopf_queue_discard_partial(struct iopf_queue *queue)
  229. {
  230. struct iopf_fault *iopf, *next;
  231. struct iopf_device_param *iopf_param;
  232. if (!queue)
  233. return -EINVAL;
  234. mutex_lock(&queue->lock);
  235. list_for_each_entry(iopf_param, &queue->devices, queue_list) {
  236. list_for_each_entry_safe(iopf, next, &iopf_param->partial,
  237. list) {
  238. list_del(&iopf->list);
  239. kfree(iopf);
  240. }
  241. }
  242. mutex_unlock(&queue->lock);
  243. return 0;
  244. }
  245. EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
  246. /**
  247. * iopf_queue_add_device - Add producer to the fault queue
  248. * @queue: IOPF queue
  249. * @dev: device to add
  250. *
  251. * Return: 0 on success and <0 on error.
  252. */
  253. int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
  254. {
  255. int ret = -EBUSY;
  256. struct iopf_device_param *iopf_param;
  257. struct dev_iommu *param = dev->iommu;
  258. if (!param)
  259. return -ENODEV;
  260. iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
  261. if (!iopf_param)
  262. return -ENOMEM;
  263. INIT_LIST_HEAD(&iopf_param->partial);
  264. iopf_param->queue = queue;
  265. iopf_param->dev = dev;
  266. mutex_lock(&queue->lock);
  267. mutex_lock(&param->lock);
  268. if (!param->iopf_param) {
  269. list_add(&iopf_param->queue_list, &queue->devices);
  270. param->iopf_param = iopf_param;
  271. ret = 0;
  272. }
  273. mutex_unlock(&param->lock);
  274. mutex_unlock(&queue->lock);
  275. if (ret)
  276. kfree(iopf_param);
  277. return ret;
  278. }
  279. EXPORT_SYMBOL_GPL(iopf_queue_add_device);
  280. /**
  281. * iopf_queue_remove_device - Remove producer from fault queue
  282. * @queue: IOPF queue
  283. * @dev: device to remove
  284. *
  285. * Caller makes sure that no more faults are reported for this device.
  286. *
  287. * Return: 0 on success and <0 on error.
  288. */
  289. int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
  290. {
  291. int ret = -EINVAL;
  292. struct iopf_fault *iopf, *next;
  293. struct iopf_device_param *iopf_param;
  294. struct dev_iommu *param = dev->iommu;
  295. if (!param || !queue)
  296. return -EINVAL;
  297. mutex_lock(&queue->lock);
  298. mutex_lock(&param->lock);
  299. iopf_param = param->iopf_param;
  300. if (iopf_param && iopf_param->queue == queue) {
  301. list_del(&iopf_param->queue_list);
  302. param->iopf_param = NULL;
  303. ret = 0;
  304. }
  305. mutex_unlock(&param->lock);
  306. mutex_unlock(&queue->lock);
  307. if (ret)
  308. return ret;
  309. /* Just in case some faults are still stuck */
  310. list_for_each_entry_safe(iopf, next, &iopf_param->partial, list)
  311. kfree(iopf);
  312. kfree(iopf_param);
  313. return 0;
  314. }
  315. EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
  316. /**
  317. * iopf_queue_alloc - Allocate and initialize a fault queue
  318. * @name: a unique string identifying the queue (for workqueue)
  319. *
  320. * Return: the queue on success and NULL on error.
  321. */
  322. struct iopf_queue *iopf_queue_alloc(const char *name)
  323. {
  324. struct iopf_queue *queue;
  325. queue = kzalloc(sizeof(*queue), GFP_KERNEL);
  326. if (!queue)
  327. return NULL;
  328. /*
  329. * The WQ is unordered because the low-level handler enqueues faults by
  330. * group. PRI requests within a group have to be ordered, but once
  331. * that's dealt with, the high-level function can handle groups out of
  332. * order.
  333. */
  334. queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
  335. if (!queue->wq) {
  336. kfree(queue);
  337. return NULL;
  338. }
  339. INIT_LIST_HEAD(&queue->devices);
  340. mutex_init(&queue->lock);
  341. return queue;
  342. }
  343. EXPORT_SYMBOL_GPL(iopf_queue_alloc);
  344. /**
  345. * iopf_queue_free - Free IOPF queue
  346. * @queue: queue to free
  347. *
  348. * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
  349. * adding/removing devices on this queue anymore.
  350. */
  351. void iopf_queue_free(struct iopf_queue *queue)
  352. {
  353. struct iopf_device_param *iopf_param, *next;
  354. if (!queue)
  355. return;
  356. list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
  357. iopf_queue_remove_device(queue, iopf_param->dev);
  358. destroy_workqueue(queue->wq);
  359. kfree(queue);
  360. }
  361. EXPORT_SYMBOL_GPL(iopf_queue_free);