vmci_guest.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * VMware VMCI Driver
  4. *
  5. * Copyright (C) 2012 VMware, Inc. All rights reserved.
  6. */
  7. #include <linux/vmw_vmci_defs.h>
  8. #include <linux/vmw_vmci_api.h>
  9. #include <linux/moduleparam.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/highmem.h>
  12. #include <linux/kernel.h>
  13. #include <linux/mm.h>
  14. #include <linux/module.h>
  15. #include <linux/processor.h>
  16. #include <linux/sched.h>
  17. #include <linux/slab.h>
  18. #include <linux/init.h>
  19. #include <linux/pci.h>
  20. #include <linux/smp.h>
  21. #include <linux/io.h>
  22. #include <linux/vmalloc.h>
  23. #include "vmci_datagram.h"
  24. #include "vmci_doorbell.h"
  25. #include "vmci_context.h"
  26. #include "vmci_driver.h"
  27. #include "vmci_event.h"
  28. #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
  29. #define VMCI_UTIL_NUM_RESOURCES 1
  30. /*
  31. * Datagram buffers for DMA send/receive must accommodate at least
  32. * a maximum sized datagram and the header.
  33. */
  34. #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
  35. static bool vmci_disable_msi;
  36. module_param_named(disable_msi, vmci_disable_msi, bool, 0);
  37. MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
  38. static bool vmci_disable_msix;
  39. module_param_named(disable_msix, vmci_disable_msix, bool, 0);
  40. MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
  41. static u32 ctx_update_sub_id = VMCI_INVALID_ID;
  42. static u32 vm_context_id = VMCI_INVALID_ID;
  43. struct vmci_guest_device {
  44. struct device *dev; /* PCI device we are attached to */
  45. void __iomem *iobase;
  46. void __iomem *mmio_base;
  47. bool exclusive_vectors;
  48. struct wait_queue_head inout_wq;
  49. void *data_buffer;
  50. dma_addr_t data_buffer_base;
  51. void *tx_buffer;
  52. dma_addr_t tx_buffer_base;
  53. void *notification_bitmap;
  54. dma_addr_t notification_base;
  55. };
  56. static bool use_ppn64;
  57. bool vmci_use_ppn64(void)
  58. {
  59. return use_ppn64;
  60. }
  61. /* vmci_dev singleton device and supporting data*/
  62. struct pci_dev *vmci_pdev;
  63. static struct vmci_guest_device *vmci_dev_g;
  64. static DEFINE_SPINLOCK(vmci_dev_spinlock);
  65. static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
  66. bool vmci_guest_code_active(void)
  67. {
  68. return atomic_read(&vmci_num_guest_devices) != 0;
  69. }
  70. u32 vmci_get_vm_context_id(void)
  71. {
  72. if (vm_context_id == VMCI_INVALID_ID) {
  73. struct vmci_datagram get_cid_msg;
  74. get_cid_msg.dst =
  75. vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
  76. VMCI_GET_CONTEXT_ID);
  77. get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
  78. get_cid_msg.payload_size = 0;
  79. vm_context_id = vmci_send_datagram(&get_cid_msg);
  80. }
  81. return vm_context_id;
  82. }
  83. static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
  84. {
  85. if (dev->mmio_base != NULL)
  86. return readl(dev->mmio_base + reg);
  87. return ioread32(dev->iobase + reg);
  88. }
  89. static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
  90. {
  91. if (dev->mmio_base != NULL)
  92. writel(val, dev->mmio_base + reg);
  93. else
  94. iowrite32(val, dev->iobase + reg);
  95. }
  96. static void vmci_read_data(struct vmci_guest_device *vmci_dev,
  97. void *dest, size_t size)
  98. {
  99. if (vmci_dev->mmio_base == NULL)
  100. ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
  101. dest, size);
  102. else {
  103. /*
  104. * For DMA datagrams, the data_buffer will contain the header on the
  105. * first page, followed by the incoming datagram(s) on the following
  106. * pages. The header uses an S/G element immediately following the
  107. * header on the first page to point to the data area.
  108. */
  109. struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
  110. struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
  111. size_t buffer_offset = dest - vmci_dev->data_buffer;
  112. buffer_header->opcode = 1;
  113. buffer_header->size = 1;
  114. buffer_header->busy = 0;
  115. sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
  116. sg_array[0].size = size;
  117. vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
  118. VMCI_DATA_IN_LOW_ADDR);
  119. wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
  120. }
  121. }
  122. static int vmci_write_data(struct vmci_guest_device *dev,
  123. struct vmci_datagram *dg)
  124. {
  125. int result;
  126. if (dev->mmio_base != NULL) {
  127. struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
  128. u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
  129. if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
  130. return VMCI_ERROR_INVALID_ARGS;
  131. /*
  132. * Initialize send buffer with outgoing datagram
  133. * and set up header for inline data. Device will
  134. * not access buffer asynchronously - only after
  135. * the write to VMCI_DATA_OUT_LOW_ADDR.
  136. */
  137. memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
  138. buffer_header->opcode = 0;
  139. buffer_header->size = VMCI_DG_SIZE(dg);
  140. buffer_header->busy = 1;
  141. vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
  142. VMCI_DATA_OUT_LOW_ADDR);
  143. /* Caller holds a spinlock, so cannot block. */
  144. spin_until_cond(buffer_header->busy == 0);
  145. result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
  146. if (result == VMCI_SUCCESS)
  147. result = (int)buffer_header->result;
  148. } else {
  149. iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
  150. dg, VMCI_DG_SIZE(dg));
  151. result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
  152. }
  153. return result;
  154. }
  155. /*
  156. * VM to hypervisor call mechanism. We use the standard VMware naming
  157. * convention since shared code is calling this function as well.
  158. */
  159. int vmci_send_datagram(struct vmci_datagram *dg)
  160. {
  161. unsigned long flags;
  162. int result;
  163. /* Check args. */
  164. if (dg == NULL)
  165. return VMCI_ERROR_INVALID_ARGS;
  166. /*
  167. * Need to acquire spinlock on the device because the datagram
  168. * data may be spread over multiple pages and the monitor may
  169. * interleave device user rpc calls from multiple
  170. * VCPUs. Acquiring the spinlock precludes that
  171. * possibility. Disabling interrupts to avoid incoming
  172. * datagrams during a "rep out" and possibly landing up in
  173. * this function.
  174. */
  175. spin_lock_irqsave(&vmci_dev_spinlock, flags);
  176. if (vmci_dev_g) {
  177. vmci_write_data(vmci_dev_g, dg);
  178. result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
  179. } else {
  180. result = VMCI_ERROR_UNAVAILABLE;
  181. }
  182. spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
  183. return result;
  184. }
  185. EXPORT_SYMBOL_GPL(vmci_send_datagram);
  186. /*
  187. * Gets called with the new context id if updated or resumed.
  188. * Context id.
  189. */
  190. static void vmci_guest_cid_update(u32 sub_id,
  191. const struct vmci_event_data *event_data,
  192. void *client_data)
  193. {
  194. const struct vmci_event_payld_ctx *ev_payload =
  195. vmci_event_data_const_payload(event_data);
  196. if (sub_id != ctx_update_sub_id) {
  197. pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
  198. return;
  199. }
  200. if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
  201. pr_devel("Invalid event data\n");
  202. return;
  203. }
  204. pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
  205. vm_context_id, ev_payload->context_id, event_data->event);
  206. vm_context_id = ev_payload->context_id;
  207. }
  208. /*
  209. * Verify that the host supports the hypercalls we need. If it does not,
  210. * try to find fallback hypercalls and use those instead. Returns 0 if
  211. * required hypercalls (or fallback hypercalls) are supported by the host,
  212. * an error code otherwise.
  213. */
  214. static int vmci_check_host_caps(struct pci_dev *pdev)
  215. {
  216. bool result;
  217. struct vmci_resource_query_msg *msg;
  218. u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
  219. VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
  220. struct vmci_datagram *check_msg;
  221. check_msg = kzalloc(msg_size, GFP_KERNEL);
  222. if (!check_msg) {
  223. dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
  224. return -ENOMEM;
  225. }
  226. check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
  227. VMCI_RESOURCES_QUERY);
  228. check_msg->src = VMCI_ANON_SRC_HANDLE;
  229. check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
  230. msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
  231. msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
  232. msg->resources[0] = VMCI_GET_CONTEXT_ID;
  233. /* Checks that hyper calls are supported */
  234. result = vmci_send_datagram(check_msg) == 0x01;
  235. kfree(check_msg);
  236. dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
  237. __func__, result ? "PASSED" : "FAILED");
  238. /* We need the vector. There are no fallbacks. */
  239. return result ? 0 : -ENXIO;
  240. }
  241. /*
  242. * Reads datagrams from the device and dispatches them. For IO port
  243. * based access to the device, we always start reading datagrams into
  244. * only the first page of the datagram buffer. If the datagrams don't
  245. * fit into one page, we use the maximum datagram buffer size for the
  246. * remainder of the invocation. This is a simple heuristic for not
  247. * penalizing small datagrams. For DMA-based datagrams, we always
  248. * use the maximum datagram buffer size, since there is no performance
  249. * penalty for doing so.
  250. *
  251. * This function assumes that it has exclusive access to the data
  252. * in register(s) for the duration of the call.
  253. */
  254. static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev)
  255. {
  256. u8 *dg_in_buffer = vmci_dev->data_buffer;
  257. struct vmci_datagram *dg;
  258. size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
  259. size_t current_dg_in_buffer_size;
  260. size_t remaining_bytes;
  261. bool is_io_port = vmci_dev->mmio_base == NULL;
  262. BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
  263. if (!is_io_port) {
  264. /* For mmio, the first page is used for the header. */
  265. dg_in_buffer += PAGE_SIZE;
  266. /*
  267. * For DMA-based datagram operations, there is no performance
  268. * penalty for reading the maximum buffer size.
  269. */
  270. current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
  271. } else {
  272. current_dg_in_buffer_size = PAGE_SIZE;
  273. }
  274. vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
  275. dg = (struct vmci_datagram *)dg_in_buffer;
  276. remaining_bytes = current_dg_in_buffer_size;
  277. /*
  278. * Read through the buffer until an invalid datagram header is
  279. * encountered. The exit condition for datagrams read through
  280. * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
  281. * can start on any page boundary in the buffer.
  282. */
  283. while (dg->dst.resource != VMCI_INVALID_ID ||
  284. (is_io_port && remaining_bytes > PAGE_SIZE)) {
  285. unsigned dg_in_size;
  286. /*
  287. * If using VMCI_DATA_IN_ADDR, skip to the next page
  288. * as a datagram can start on any page boundary.
  289. */
  290. if (dg->dst.resource == VMCI_INVALID_ID) {
  291. dg = (struct vmci_datagram *)roundup(
  292. (uintptr_t)dg + 1, PAGE_SIZE);
  293. remaining_bytes =
  294. (size_t)(dg_in_buffer +
  295. current_dg_in_buffer_size -
  296. (u8 *)dg);
  297. continue;
  298. }
  299. dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
  300. if (dg_in_size <= dg_in_buffer_size) {
  301. int result;
  302. /*
  303. * If the remaining bytes in the datagram
  304. * buffer doesn't contain the complete
  305. * datagram, we first make sure we have enough
  306. * room for it and then we read the reminder
  307. * of the datagram and possibly any following
  308. * datagrams.
  309. */
  310. if (dg_in_size > remaining_bytes) {
  311. if (remaining_bytes !=
  312. current_dg_in_buffer_size) {
  313. /*
  314. * We move the partial
  315. * datagram to the front and
  316. * read the reminder of the
  317. * datagram and possibly
  318. * following calls into the
  319. * following bytes.
  320. */
  321. memmove(dg_in_buffer, dg_in_buffer +
  322. current_dg_in_buffer_size -
  323. remaining_bytes,
  324. remaining_bytes);
  325. dg = (struct vmci_datagram *)
  326. dg_in_buffer;
  327. }
  328. if (current_dg_in_buffer_size !=
  329. dg_in_buffer_size)
  330. current_dg_in_buffer_size =
  331. dg_in_buffer_size;
  332. vmci_read_data(vmci_dev,
  333. dg_in_buffer +
  334. remaining_bytes,
  335. current_dg_in_buffer_size -
  336. remaining_bytes);
  337. }
  338. /*
  339. * We special case event datagrams from the
  340. * hypervisor.
  341. */
  342. if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
  343. dg->dst.resource == VMCI_EVENT_HANDLER) {
  344. result = vmci_event_dispatch(dg);
  345. } else {
  346. result = vmci_datagram_invoke_guest_handler(dg);
  347. }
  348. if (result < VMCI_SUCCESS)
  349. dev_dbg(vmci_dev->dev,
  350. "Datagram with resource (ID=0x%x) failed (err=%d)\n",
  351. dg->dst.resource, result);
  352. /* On to the next datagram. */
  353. dg = (struct vmci_datagram *)((u8 *)dg +
  354. dg_in_size);
  355. } else {
  356. size_t bytes_to_skip;
  357. /*
  358. * Datagram doesn't fit in datagram buffer of maximal
  359. * size. We drop it.
  360. */
  361. dev_dbg(vmci_dev->dev,
  362. "Failed to receive datagram (size=%u bytes)\n",
  363. dg_in_size);
  364. bytes_to_skip = dg_in_size - remaining_bytes;
  365. if (current_dg_in_buffer_size != dg_in_buffer_size)
  366. current_dg_in_buffer_size = dg_in_buffer_size;
  367. for (;;) {
  368. vmci_read_data(vmci_dev, dg_in_buffer,
  369. current_dg_in_buffer_size);
  370. if (bytes_to_skip <= current_dg_in_buffer_size)
  371. break;
  372. bytes_to_skip -= current_dg_in_buffer_size;
  373. }
  374. dg = (struct vmci_datagram *)(dg_in_buffer +
  375. bytes_to_skip);
  376. }
  377. remaining_bytes =
  378. (size_t) (dg_in_buffer + current_dg_in_buffer_size -
  379. (u8 *)dg);
  380. if (remaining_bytes < VMCI_DG_HEADERSIZE) {
  381. /* Get the next batch of datagrams. */
  382. vmci_read_data(vmci_dev, dg_in_buffer,
  383. current_dg_in_buffer_size);
  384. dg = (struct vmci_datagram *)dg_in_buffer;
  385. remaining_bytes = current_dg_in_buffer_size;
  386. }
  387. }
  388. }
  389. /*
  390. * Scans the notification bitmap for raised flags, clears them
  391. * and handles the notifications.
  392. */
  393. static void vmci_process_bitmap(struct vmci_guest_device *dev)
  394. {
  395. if (!dev->notification_bitmap) {
  396. dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
  397. return;
  398. }
  399. vmci_dbell_scan_notification_entries(dev->notification_bitmap);
  400. }
  401. /*
  402. * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
  403. * interrupt (vector VMCI_INTR_DATAGRAM).
  404. */
  405. static irqreturn_t vmci_interrupt(int irq, void *_dev)
  406. {
  407. struct vmci_guest_device *dev = _dev;
  408. /*
  409. * If we are using MSI-X with exclusive vectors then we simply call
  410. * vmci_dispatch_dgs(), since we know the interrupt was meant for us.
  411. * Otherwise we must read the ICR to determine what to do.
  412. */
  413. if (dev->exclusive_vectors) {
  414. vmci_dispatch_dgs(dev);
  415. } else {
  416. unsigned int icr;
  417. /* Acknowledge interrupt and determine what needs doing. */
  418. icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
  419. if (icr == 0 || icr == ~0)
  420. return IRQ_NONE;
  421. if (icr & VMCI_ICR_DATAGRAM) {
  422. vmci_dispatch_dgs(dev);
  423. icr &= ~VMCI_ICR_DATAGRAM;
  424. }
  425. if (icr & VMCI_ICR_NOTIFICATION) {
  426. vmci_process_bitmap(dev);
  427. icr &= ~VMCI_ICR_NOTIFICATION;
  428. }
  429. if (icr & VMCI_ICR_DMA_DATAGRAM) {
  430. wake_up_all(&dev->inout_wq);
  431. icr &= ~VMCI_ICR_DMA_DATAGRAM;
  432. }
  433. if (icr != 0)
  434. dev_warn(dev->dev,
  435. "Ignoring unknown interrupt cause (%d)\n",
  436. icr);
  437. }
  438. return IRQ_HANDLED;
  439. }
  440. /*
  441. * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
  442. * which is for the notification bitmap. Will only get called if we are
  443. * using MSI-X with exclusive vectors.
  444. */
  445. static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
  446. {
  447. struct vmci_guest_device *dev = _dev;
  448. /* For MSI-X we can just assume it was meant for us. */
  449. vmci_process_bitmap(dev);
  450. return IRQ_HANDLED;
  451. }
  452. /*
  453. * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
  454. * which is for the completion of a DMA datagram send or receive operation.
  455. * Will only get called if we are using MSI-X with exclusive vectors.
  456. */
  457. static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
  458. {
  459. struct vmci_guest_device *dev = _dev;
  460. wake_up_all(&dev->inout_wq);
  461. return IRQ_HANDLED;
  462. }
  463. static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
  464. {
  465. if (vmci_dev->mmio_base != NULL) {
  466. if (vmci_dev->tx_buffer != NULL)
  467. dma_free_coherent(vmci_dev->dev,
  468. VMCI_DMA_DG_BUFFER_SIZE,
  469. vmci_dev->tx_buffer,
  470. vmci_dev->tx_buffer_base);
  471. if (vmci_dev->data_buffer != NULL)
  472. dma_free_coherent(vmci_dev->dev,
  473. VMCI_DMA_DG_BUFFER_SIZE,
  474. vmci_dev->data_buffer,
  475. vmci_dev->data_buffer_base);
  476. } else {
  477. vfree(vmci_dev->data_buffer);
  478. }
  479. }
  480. /*
  481. * Most of the initialization at module load time is done here.
  482. */
  483. static int vmci_guest_probe_device(struct pci_dev *pdev,
  484. const struct pci_device_id *id)
  485. {
  486. struct vmci_guest_device *vmci_dev;
  487. void __iomem *iobase = NULL;
  488. void __iomem *mmio_base = NULL;
  489. unsigned int num_irq_vectors;
  490. unsigned int capabilities;
  491. unsigned int caps_in_use;
  492. unsigned long cmd;
  493. int vmci_err;
  494. int error;
  495. dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
  496. error = pcim_enable_device(pdev);
  497. if (error) {
  498. dev_err(&pdev->dev,
  499. "Failed to enable VMCI device: %d\n", error);
  500. return error;
  501. }
  502. /*
  503. * The VMCI device with mmio access to registers requests 256KB
  504. * for BAR1. If present, driver will use new VMCI device
  505. * functionality for register access and datagram send/recv.
  506. */
  507. if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
  508. dev_info(&pdev->dev, "MMIO register access is available\n");
  509. mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
  510. VMCI_MMIO_ACCESS_SIZE);
  511. /* If the map fails, we fall back to IOIO access. */
  512. if (!mmio_base)
  513. dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
  514. }
  515. if (!mmio_base) {
  516. if (IS_ENABLED(CONFIG_ARM64)) {
  517. dev_err(&pdev->dev, "MMIO base is invalid\n");
  518. return -ENXIO;
  519. }
  520. error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
  521. if (error) {
  522. dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
  523. return error;
  524. }
  525. iobase = pcim_iomap_table(pdev)[0];
  526. }
  527. vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
  528. if (!vmci_dev) {
  529. dev_err(&pdev->dev,
  530. "Can't allocate memory for VMCI device\n");
  531. return -ENOMEM;
  532. }
  533. vmci_dev->dev = &pdev->dev;
  534. vmci_dev->exclusive_vectors = false;
  535. vmci_dev->iobase = iobase;
  536. vmci_dev->mmio_base = mmio_base;
  537. init_waitqueue_head(&vmci_dev->inout_wq);
  538. if (mmio_base != NULL) {
  539. vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
  540. &vmci_dev->tx_buffer_base,
  541. GFP_KERNEL);
  542. if (!vmci_dev->tx_buffer) {
  543. dev_err(&pdev->dev,
  544. "Can't allocate memory for datagram tx buffer\n");
  545. return -ENOMEM;
  546. }
  547. vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
  548. &vmci_dev->data_buffer_base,
  549. GFP_KERNEL);
  550. } else {
  551. vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
  552. }
  553. if (!vmci_dev->data_buffer) {
  554. dev_err(&pdev->dev,
  555. "Can't allocate memory for datagram buffer\n");
  556. error = -ENOMEM;
  557. goto err_free_data_buffers;
  558. }
  559. pci_set_master(pdev); /* To enable queue_pair functionality. */
  560. /*
  561. * Verify that the VMCI Device supports the capabilities that
  562. * we need. If the device is missing capabilities that we would
  563. * like to use, check for fallback capabilities and use those
  564. * instead (so we can run a new VM on old hosts). Fail the load if
  565. * a required capability is missing and there is no fallback.
  566. *
  567. * Right now, we need datagrams. There are no fallbacks.
  568. */
  569. capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
  570. if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
  571. dev_err(&pdev->dev, "Device does not support datagrams\n");
  572. error = -ENXIO;
  573. goto err_free_data_buffers;
  574. }
  575. caps_in_use = VMCI_CAPS_DATAGRAM;
  576. /*
  577. * Use 64-bit PPNs if the device supports.
  578. *
  579. * There is no check for the return value of dma_set_mask_and_coherent
  580. * since this driver can handle the default mask values if
  581. * dma_set_mask_and_coherent fails.
  582. */
  583. if (capabilities & VMCI_CAPS_PPN64) {
  584. dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
  585. use_ppn64 = true;
  586. caps_in_use |= VMCI_CAPS_PPN64;
  587. } else {
  588. dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
  589. use_ppn64 = false;
  590. }
  591. /*
  592. * If the hardware supports notifications, we will use that as
  593. * well.
  594. */
  595. if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
  596. vmci_dev->notification_bitmap = dma_alloc_coherent(
  597. &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
  598. GFP_KERNEL);
  599. if (!vmci_dev->notification_bitmap)
  600. dev_warn(&pdev->dev,
  601. "Unable to allocate notification bitmap\n");
  602. else
  603. caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
  604. }
  605. if (mmio_base != NULL) {
  606. if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
  607. caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
  608. } else {
  609. dev_err(&pdev->dev,
  610. "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
  611. error = -ENXIO;
  612. goto err_free_notification_bitmap;
  613. }
  614. }
  615. dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
  616. /* Let the host know which capabilities we intend to use. */
  617. vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
  618. if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
  619. /* Let the device know the size for pages passed down. */
  620. vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
  621. /* Configure the high order parts of the data in/out buffers. */
  622. vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
  623. VMCI_DATA_IN_HIGH_ADDR);
  624. vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
  625. VMCI_DATA_OUT_HIGH_ADDR);
  626. }
  627. /* Set up global device so that we can start sending datagrams */
  628. spin_lock_irq(&vmci_dev_spinlock);
  629. vmci_dev_g = vmci_dev;
  630. vmci_pdev = pdev;
  631. spin_unlock_irq(&vmci_dev_spinlock);
  632. /*
  633. * Register notification bitmap with device if that capability is
  634. * used.
  635. */
  636. if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
  637. unsigned long bitmap_ppn =
  638. vmci_dev->notification_base >> PAGE_SHIFT;
  639. if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
  640. dev_warn(&pdev->dev,
  641. "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
  642. bitmap_ppn);
  643. error = -ENXIO;
  644. goto err_remove_vmci_dev_g;
  645. }
  646. }
  647. /* Check host capabilities. */
  648. error = vmci_check_host_caps(pdev);
  649. if (error)
  650. goto err_remove_vmci_dev_g;
  651. /* Enable device. */
  652. /*
  653. * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
  654. * update the internal context id when needed.
  655. */
  656. vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
  657. vmci_guest_cid_update, NULL,
  658. &ctx_update_sub_id);
  659. if (vmci_err < VMCI_SUCCESS)
  660. dev_warn(&pdev->dev,
  661. "Failed to subscribe to event (type=%d): %d\n",
  662. VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
  663. /*
  664. * Enable interrupts. Try MSI-X first, then MSI, and then fallback on
  665. * legacy interrupts.
  666. */
  667. if (vmci_dev->mmio_base != NULL)
  668. num_irq_vectors = VMCI_MAX_INTRS;
  669. else
  670. num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
  671. error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
  672. PCI_IRQ_MSIX);
  673. if (error < 0) {
  674. error = pci_alloc_irq_vectors(pdev, 1, 1,
  675. PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
  676. if (error < 0)
  677. goto err_unsubscribe_event;
  678. } else {
  679. vmci_dev->exclusive_vectors = true;
  680. }
  681. /*
  682. * Request IRQ for legacy or MSI interrupts, or for first
  683. * MSI-X vector.
  684. */
  685. error = request_threaded_irq(pci_irq_vector(pdev, 0), NULL,
  686. vmci_interrupt, IRQF_SHARED,
  687. KBUILD_MODNAME, vmci_dev);
  688. if (error) {
  689. dev_err(&pdev->dev, "Irq %u in use: %d\n",
  690. pci_irq_vector(pdev, 0), error);
  691. goto err_disable_msi;
  692. }
  693. /*
  694. * For MSI-X with exclusive vectors we need to request an
  695. * interrupt for each vector so that we get a separate
  696. * interrupt handler routine. This allows us to distinguish
  697. * between the vectors.
  698. */
  699. if (vmci_dev->exclusive_vectors) {
  700. error = request_threaded_irq(pci_irq_vector(pdev, 1), NULL,
  701. vmci_interrupt_bm, 0,
  702. KBUILD_MODNAME, vmci_dev);
  703. if (error) {
  704. dev_err(&pdev->dev,
  705. "Failed to allocate irq %u: %d\n",
  706. pci_irq_vector(pdev, 1), error);
  707. goto err_free_irq;
  708. }
  709. if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
  710. error = request_threaded_irq(pci_irq_vector(pdev, 2),
  711. NULL,
  712. vmci_interrupt_dma_datagram,
  713. 0, KBUILD_MODNAME,
  714. vmci_dev);
  715. if (error) {
  716. dev_err(&pdev->dev,
  717. "Failed to allocate irq %u: %d\n",
  718. pci_irq_vector(pdev, 2), error);
  719. goto err_free_bm_irq;
  720. }
  721. }
  722. }
  723. dev_dbg(&pdev->dev, "Registered device\n");
  724. atomic_inc(&vmci_num_guest_devices);
  725. /* Enable specific interrupt bits. */
  726. cmd = VMCI_IMR_DATAGRAM;
  727. if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
  728. cmd |= VMCI_IMR_NOTIFICATION;
  729. if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
  730. cmd |= VMCI_IMR_DMA_DATAGRAM;
  731. vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
  732. /* Enable interrupts. */
  733. vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
  734. pci_set_drvdata(pdev, vmci_dev);
  735. vmci_call_vsock_callback(false);
  736. return 0;
  737. err_free_bm_irq:
  738. if (vmci_dev->exclusive_vectors)
  739. free_irq(pci_irq_vector(pdev, 1), vmci_dev);
  740. err_free_irq:
  741. free_irq(pci_irq_vector(pdev, 0), vmci_dev);
  742. err_disable_msi:
  743. pci_free_irq_vectors(pdev);
  744. err_unsubscribe_event:
  745. vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
  746. if (vmci_err < VMCI_SUCCESS)
  747. dev_warn(&pdev->dev,
  748. "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
  749. VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
  750. err_remove_vmci_dev_g:
  751. spin_lock_irq(&vmci_dev_spinlock);
  752. vmci_pdev = NULL;
  753. vmci_dev_g = NULL;
  754. spin_unlock_irq(&vmci_dev_spinlock);
  755. err_free_notification_bitmap:
  756. if (vmci_dev->notification_bitmap) {
  757. vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
  758. dma_free_coherent(&pdev->dev, PAGE_SIZE,
  759. vmci_dev->notification_bitmap,
  760. vmci_dev->notification_base);
  761. }
  762. err_free_data_buffers:
  763. vmci_free_dg_buffers(vmci_dev);
  764. /* The rest are managed resources and will be freed by PCI core */
  765. return error;
  766. }
  767. static void vmci_guest_remove_device(struct pci_dev *pdev)
  768. {
  769. struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
  770. int vmci_err;
  771. dev_dbg(&pdev->dev, "Removing device\n");
  772. atomic_dec(&vmci_num_guest_devices);
  773. vmci_qp_guest_endpoints_exit();
  774. vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
  775. if (vmci_err < VMCI_SUCCESS)
  776. dev_warn(&pdev->dev,
  777. "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
  778. VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
  779. spin_lock_irq(&vmci_dev_spinlock);
  780. vmci_dev_g = NULL;
  781. vmci_pdev = NULL;
  782. spin_unlock_irq(&vmci_dev_spinlock);
  783. dev_dbg(&pdev->dev, "Resetting vmci device\n");
  784. vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
  785. /*
  786. * Free IRQ and then disable MSI/MSI-X as appropriate. For
  787. * MSI-X, we might have multiple vectors, each with their own
  788. * IRQ, which we must free too.
  789. */
  790. if (vmci_dev->exclusive_vectors) {
  791. free_irq(pci_irq_vector(pdev, 1), vmci_dev);
  792. if (vmci_dev->mmio_base != NULL)
  793. free_irq(pci_irq_vector(pdev, 2), vmci_dev);
  794. }
  795. free_irq(pci_irq_vector(pdev, 0), vmci_dev);
  796. pci_free_irq_vectors(pdev);
  797. if (vmci_dev->notification_bitmap) {
  798. /*
  799. * The device reset above cleared the bitmap state of the
  800. * device, so we can safely free it here.
  801. */
  802. dma_free_coherent(&pdev->dev, PAGE_SIZE,
  803. vmci_dev->notification_bitmap,
  804. vmci_dev->notification_base);
  805. }
  806. vmci_free_dg_buffers(vmci_dev);
  807. if (vmci_dev->mmio_base != NULL)
  808. pci_iounmap(pdev, vmci_dev->mmio_base);
  809. /* The rest are managed resources and will be freed by PCI core */
  810. }
  811. static const struct pci_device_id vmci_ids[] = {
  812. { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
  813. { 0 },
  814. };
  815. MODULE_DEVICE_TABLE(pci, vmci_ids);
  816. static struct pci_driver vmci_guest_driver = {
  817. .name = KBUILD_MODNAME,
  818. .id_table = vmci_ids,
  819. .probe = vmci_guest_probe_device,
  820. .remove = vmci_guest_remove_device,
  821. };
  822. int __init vmci_guest_init(void)
  823. {
  824. return pci_register_driver(&vmci_guest_driver);
  825. }
  826. void __exit vmci_guest_exit(void)
  827. {
  828. pci_unregister_driver(&vmci_guest_driver);
  829. }