vfio_pci_rdwr.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VFIO PCI I/O Port & MMIO access
  4. *
  5. * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
  6. * Author: Alex Williamson <[email protected]>
  7. *
  8. * Derived from original vfio:
  9. * Copyright 2010 Cisco Systems, Inc. All rights reserved.
  10. * Author: Tom Lyon, [email protected]
  11. */
  12. #include <linux/fs.h>
  13. #include <linux/pci.h>
  14. #include <linux/uaccess.h>
  15. #include <linux/io.h>
  16. #include <linux/vfio.h>
  17. #include <linux/vgaarb.h>
  18. #include "vfio_pci_priv.h"
  19. #ifdef __LITTLE_ENDIAN
  20. #define vfio_ioread64 ioread64
  21. #define vfio_iowrite64 iowrite64
  22. #define vfio_ioread32 ioread32
  23. #define vfio_iowrite32 iowrite32
  24. #define vfio_ioread16 ioread16
  25. #define vfio_iowrite16 iowrite16
  26. #else
  27. #define vfio_ioread64 ioread64be
  28. #define vfio_iowrite64 iowrite64be
  29. #define vfio_ioread32 ioread32be
  30. #define vfio_iowrite32 iowrite32be
  31. #define vfio_ioread16 ioread16be
  32. #define vfio_iowrite16 iowrite16be
  33. #endif
  34. #define vfio_ioread8 ioread8
  35. #define vfio_iowrite8 iowrite8
  36. #define VFIO_IOWRITE(size) \
  37. static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \
  38. bool test_mem, u##size val, void __iomem *io) \
  39. { \
  40. if (test_mem) { \
  41. down_read(&vdev->memory_lock); \
  42. if (!__vfio_pci_memory_enabled(vdev)) { \
  43. up_read(&vdev->memory_lock); \
  44. return -EIO; \
  45. } \
  46. } \
  47. \
  48. vfio_iowrite##size(val, io); \
  49. \
  50. if (test_mem) \
  51. up_read(&vdev->memory_lock); \
  52. \
  53. return 0; \
  54. }
  55. VFIO_IOWRITE(8)
  56. VFIO_IOWRITE(16)
  57. VFIO_IOWRITE(32)
  58. #ifdef iowrite64
  59. VFIO_IOWRITE(64)
  60. #endif
  61. #define VFIO_IOREAD(size) \
  62. static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \
  63. bool test_mem, u##size *val, void __iomem *io) \
  64. { \
  65. if (test_mem) { \
  66. down_read(&vdev->memory_lock); \
  67. if (!__vfio_pci_memory_enabled(vdev)) { \
  68. up_read(&vdev->memory_lock); \
  69. return -EIO; \
  70. } \
  71. } \
  72. \
  73. *val = vfio_ioread##size(io); \
  74. \
  75. if (test_mem) \
  76. up_read(&vdev->memory_lock); \
  77. \
  78. return 0; \
  79. }
  80. VFIO_IOREAD(8)
  81. VFIO_IOREAD(16)
  82. VFIO_IOREAD(32)
  83. /*
  84. * Read or write from an __iomem region (MMIO or I/O port) with an excluded
  85. * range which is inaccessible. The excluded range drops writes and fills
  86. * reads with -1. This is intended for handling MSI-X vector tables and
  87. * leftover space for ROM BARs.
  88. */
  89. static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
  90. void __iomem *io, char __user *buf,
  91. loff_t off, size_t count, size_t x_start,
  92. size_t x_end, bool iswrite)
  93. {
  94. ssize_t done = 0;
  95. int ret;
  96. while (count) {
  97. size_t fillable, filled;
  98. if (off < x_start)
  99. fillable = min(count, (size_t)(x_start - off));
  100. else if (off >= x_end)
  101. fillable = count;
  102. else
  103. fillable = 0;
  104. if (fillable >= 4 && !(off % 4)) {
  105. u32 val;
  106. if (iswrite) {
  107. if (copy_from_user(&val, buf, 4))
  108. return -EFAULT;
  109. ret = vfio_pci_iowrite32(vdev, test_mem,
  110. val, io + off);
  111. if (ret)
  112. return ret;
  113. } else {
  114. ret = vfio_pci_ioread32(vdev, test_mem,
  115. &val, io + off);
  116. if (ret)
  117. return ret;
  118. if (copy_to_user(buf, &val, 4))
  119. return -EFAULT;
  120. }
  121. filled = 4;
  122. } else if (fillable >= 2 && !(off % 2)) {
  123. u16 val;
  124. if (iswrite) {
  125. if (copy_from_user(&val, buf, 2))
  126. return -EFAULT;
  127. ret = vfio_pci_iowrite16(vdev, test_mem,
  128. val, io + off);
  129. if (ret)
  130. return ret;
  131. } else {
  132. ret = vfio_pci_ioread16(vdev, test_mem,
  133. &val, io + off);
  134. if (ret)
  135. return ret;
  136. if (copy_to_user(buf, &val, 2))
  137. return -EFAULT;
  138. }
  139. filled = 2;
  140. } else if (fillable) {
  141. u8 val;
  142. if (iswrite) {
  143. if (copy_from_user(&val, buf, 1))
  144. return -EFAULT;
  145. ret = vfio_pci_iowrite8(vdev, test_mem,
  146. val, io + off);
  147. if (ret)
  148. return ret;
  149. } else {
  150. ret = vfio_pci_ioread8(vdev, test_mem,
  151. &val, io + off);
  152. if (ret)
  153. return ret;
  154. if (copy_to_user(buf, &val, 1))
  155. return -EFAULT;
  156. }
  157. filled = 1;
  158. } else {
  159. /* Fill reads with -1, drop writes */
  160. filled = min(count, (size_t)(x_end - off));
  161. if (!iswrite) {
  162. u8 val = 0xFF;
  163. size_t i;
  164. for (i = 0; i < filled; i++)
  165. if (copy_to_user(buf + i, &val, 1))
  166. return -EFAULT;
  167. }
  168. }
  169. count -= filled;
  170. done += filled;
  171. off += filled;
  172. buf += filled;
  173. }
  174. return done;
  175. }
  176. static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
  177. {
  178. struct pci_dev *pdev = vdev->pdev;
  179. int ret;
  180. void __iomem *io;
  181. if (vdev->barmap[bar])
  182. return 0;
  183. ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
  184. if (ret)
  185. return ret;
  186. io = pci_iomap(pdev, bar, 0);
  187. if (!io) {
  188. pci_release_selected_regions(pdev, 1 << bar);
  189. return -ENOMEM;
  190. }
  191. vdev->barmap[bar] = io;
  192. return 0;
  193. }
  194. ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
  195. size_t count, loff_t *ppos, bool iswrite)
  196. {
  197. struct pci_dev *pdev = vdev->pdev;
  198. loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
  199. int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
  200. size_t x_start = 0, x_end = 0;
  201. resource_size_t end;
  202. void __iomem *io;
  203. struct resource *res = &vdev->pdev->resource[bar];
  204. ssize_t done;
  205. if (pci_resource_start(pdev, bar))
  206. end = pci_resource_len(pdev, bar);
  207. else if (bar == PCI_ROM_RESOURCE &&
  208. pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
  209. end = 0x20000;
  210. else
  211. return -EINVAL;
  212. if (pos >= end)
  213. return -EINVAL;
  214. count = min(count, (size_t)(end - pos));
  215. if (bar == PCI_ROM_RESOURCE) {
  216. /*
  217. * The ROM can fill less space than the BAR, so we start the
  218. * excluded range at the end of the actual ROM. This makes
  219. * filling large ROM BARs much faster.
  220. */
  221. io = pci_map_rom(pdev, &x_start);
  222. if (!io) {
  223. done = -ENOMEM;
  224. goto out;
  225. }
  226. x_end = end;
  227. } else {
  228. int ret = vfio_pci_setup_barmap(vdev, bar);
  229. if (ret) {
  230. done = ret;
  231. goto out;
  232. }
  233. io = vdev->barmap[bar];
  234. }
  235. if (bar == vdev->msix_bar) {
  236. x_start = vdev->msix_offset;
  237. x_end = vdev->msix_offset + vdev->msix_size;
  238. }
  239. done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
  240. count, x_start, x_end, iswrite);
  241. if (done >= 0)
  242. *ppos += done;
  243. if (bar == PCI_ROM_RESOURCE)
  244. pci_unmap_rom(pdev, io);
  245. out:
  246. return done;
  247. }
  248. #ifdef CONFIG_VFIO_PCI_VGA
  249. ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
  250. size_t count, loff_t *ppos, bool iswrite)
  251. {
  252. int ret;
  253. loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
  254. void __iomem *iomem = NULL;
  255. unsigned int rsrc;
  256. bool is_ioport;
  257. ssize_t done;
  258. if (!vdev->has_vga)
  259. return -EINVAL;
  260. if (pos > 0xbfffful)
  261. return -EINVAL;
  262. switch ((u32)pos) {
  263. case 0xa0000 ... 0xbffff:
  264. count = min(count, (size_t)(0xc0000 - pos));
  265. iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
  266. off = pos - 0xa0000;
  267. rsrc = VGA_RSRC_LEGACY_MEM;
  268. is_ioport = false;
  269. break;
  270. case 0x3b0 ... 0x3bb:
  271. count = min(count, (size_t)(0x3bc - pos));
  272. iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
  273. off = pos - 0x3b0;
  274. rsrc = VGA_RSRC_LEGACY_IO;
  275. is_ioport = true;
  276. break;
  277. case 0x3c0 ... 0x3df:
  278. count = min(count, (size_t)(0x3e0 - pos));
  279. iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
  280. off = pos - 0x3c0;
  281. rsrc = VGA_RSRC_LEGACY_IO;
  282. is_ioport = true;
  283. break;
  284. default:
  285. return -EINVAL;
  286. }
  287. if (!iomem)
  288. return -ENOMEM;
  289. ret = vga_get_interruptible(vdev->pdev, rsrc);
  290. if (ret) {
  291. is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
  292. return ret;
  293. }
  294. /*
  295. * VGA MMIO is a legacy, non-BAR resource that hopefully allows
  296. * probing, so we don't currently worry about access in relation
  297. * to the memory enable bit in the command register.
  298. */
  299. done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
  300. vga_put(vdev->pdev, rsrc);
  301. is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
  302. if (done >= 0)
  303. *ppos += done;
  304. return done;
  305. }
  306. #endif
  307. static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
  308. bool test_mem)
  309. {
  310. switch (ioeventfd->count) {
  311. case 1:
  312. vfio_pci_iowrite8(ioeventfd->vdev, test_mem,
  313. ioeventfd->data, ioeventfd->addr);
  314. break;
  315. case 2:
  316. vfio_pci_iowrite16(ioeventfd->vdev, test_mem,
  317. ioeventfd->data, ioeventfd->addr);
  318. break;
  319. case 4:
  320. vfio_pci_iowrite32(ioeventfd->vdev, test_mem,
  321. ioeventfd->data, ioeventfd->addr);
  322. break;
  323. #ifdef iowrite64
  324. case 8:
  325. vfio_pci_iowrite64(ioeventfd->vdev, test_mem,
  326. ioeventfd->data, ioeventfd->addr);
  327. break;
  328. #endif
  329. }
  330. }
  331. static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
  332. {
  333. struct vfio_pci_ioeventfd *ioeventfd = opaque;
  334. struct vfio_pci_core_device *vdev = ioeventfd->vdev;
  335. if (ioeventfd->test_mem) {
  336. if (!down_read_trylock(&vdev->memory_lock))
  337. return 1; /* Lock contended, use thread */
  338. if (!__vfio_pci_memory_enabled(vdev)) {
  339. up_read(&vdev->memory_lock);
  340. return 0;
  341. }
  342. }
  343. vfio_pci_ioeventfd_do_write(ioeventfd, false);
  344. if (ioeventfd->test_mem)
  345. up_read(&vdev->memory_lock);
  346. return 0;
  347. }
  348. static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
  349. {
  350. struct vfio_pci_ioeventfd *ioeventfd = opaque;
  351. vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
  352. }
  353. int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
  354. uint64_t data, int count, int fd)
  355. {
  356. struct pci_dev *pdev = vdev->pdev;
  357. loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
  358. int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
  359. struct vfio_pci_ioeventfd *ioeventfd;
  360. /* Only support ioeventfds into BARs */
  361. if (bar > VFIO_PCI_BAR5_REGION_INDEX)
  362. return -EINVAL;
  363. if (pos + count > pci_resource_len(pdev, bar))
  364. return -EINVAL;
  365. /* Disallow ioeventfds working around MSI-X table writes */
  366. if (bar == vdev->msix_bar &&
  367. !(pos + count <= vdev->msix_offset ||
  368. pos >= vdev->msix_offset + vdev->msix_size))
  369. return -EINVAL;
  370. #ifndef iowrite64
  371. if (count == 8)
  372. return -EINVAL;
  373. #endif
  374. ret = vfio_pci_setup_barmap(vdev, bar);
  375. if (ret)
  376. return ret;
  377. mutex_lock(&vdev->ioeventfds_lock);
  378. list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
  379. if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
  380. ioeventfd->data == data && ioeventfd->count == count) {
  381. if (fd == -1) {
  382. vfio_virqfd_disable(&ioeventfd->virqfd);
  383. list_del(&ioeventfd->next);
  384. vdev->ioeventfds_nr--;
  385. kfree(ioeventfd);
  386. ret = 0;
  387. } else
  388. ret = -EEXIST;
  389. goto out_unlock;
  390. }
  391. }
  392. if (fd < 0) {
  393. ret = -ENODEV;
  394. goto out_unlock;
  395. }
  396. if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
  397. ret = -ENOSPC;
  398. goto out_unlock;
  399. }
  400. ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
  401. if (!ioeventfd) {
  402. ret = -ENOMEM;
  403. goto out_unlock;
  404. }
  405. ioeventfd->vdev = vdev;
  406. ioeventfd->addr = vdev->barmap[bar] + pos;
  407. ioeventfd->data = data;
  408. ioeventfd->pos = pos;
  409. ioeventfd->bar = bar;
  410. ioeventfd->count = count;
  411. ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
  412. ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
  413. vfio_pci_ioeventfd_thread, NULL,
  414. &ioeventfd->virqfd, fd);
  415. if (ret) {
  416. kfree(ioeventfd);
  417. goto out_unlock;
  418. }
  419. list_add(&ioeventfd->next, &vdev->ioeventfds_list);
  420. vdev->ioeventfds_nr++;
  421. out_unlock:
  422. mutex_unlock(&vdev->ioeventfds_lock);
  423. return ret;
  424. }