123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * VFIO PCI I/O Port & MMIO access
- *
- * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
- * Author: Alex Williamson <[email protected]>
- *
- * Derived from original vfio:
- * Copyright 2010 Cisco Systems, Inc. All rights reserved.
- * Author: Tom Lyon, [email protected]
- */
- #include <linux/fs.h>
- #include <linux/pci.h>
- #include <linux/uaccess.h>
- #include <linux/io.h>
- #include <linux/vfio.h>
- #include <linux/vgaarb.h>
- #include "vfio_pci_priv.h"
- #ifdef __LITTLE_ENDIAN
- #define vfio_ioread64 ioread64
- #define vfio_iowrite64 iowrite64
- #define vfio_ioread32 ioread32
- #define vfio_iowrite32 iowrite32
- #define vfio_ioread16 ioread16
- #define vfio_iowrite16 iowrite16
- #else
- #define vfio_ioread64 ioread64be
- #define vfio_iowrite64 iowrite64be
- #define vfio_ioread32 ioread32be
- #define vfio_iowrite32 iowrite32be
- #define vfio_ioread16 ioread16be
- #define vfio_iowrite16 iowrite16be
- #endif
- #define vfio_ioread8 ioread8
- #define vfio_iowrite8 iowrite8
- #define VFIO_IOWRITE(size) \
- static int vfio_pci_iowrite##size(struct vfio_pci_core_device *vdev, \
- bool test_mem, u##size val, void __iomem *io) \
- { \
- if (test_mem) { \
- down_read(&vdev->memory_lock); \
- if (!__vfio_pci_memory_enabled(vdev)) { \
- up_read(&vdev->memory_lock); \
- return -EIO; \
- } \
- } \
- \
- vfio_iowrite##size(val, io); \
- \
- if (test_mem) \
- up_read(&vdev->memory_lock); \
- \
- return 0; \
- }
- VFIO_IOWRITE(8)
- VFIO_IOWRITE(16)
- VFIO_IOWRITE(32)
- #ifdef iowrite64
- VFIO_IOWRITE(64)
- #endif
- #define VFIO_IOREAD(size) \
- static int vfio_pci_ioread##size(struct vfio_pci_core_device *vdev, \
- bool test_mem, u##size *val, void __iomem *io) \
- { \
- if (test_mem) { \
- down_read(&vdev->memory_lock); \
- if (!__vfio_pci_memory_enabled(vdev)) { \
- up_read(&vdev->memory_lock); \
- return -EIO; \
- } \
- } \
- \
- *val = vfio_ioread##size(io); \
- \
- if (test_mem) \
- up_read(&vdev->memory_lock); \
- \
- return 0; \
- }
- VFIO_IOREAD(8)
- VFIO_IOREAD(16)
- VFIO_IOREAD(32)
- /*
- * Read or write from an __iomem region (MMIO or I/O port) with an excluded
- * range which is inaccessible. The excluded range drops writes and fills
- * reads with -1. This is intended for handling MSI-X vector tables and
- * leftover space for ROM BARs.
- */
- static ssize_t do_io_rw(struct vfio_pci_core_device *vdev, bool test_mem,
- void __iomem *io, char __user *buf,
- loff_t off, size_t count, size_t x_start,
- size_t x_end, bool iswrite)
- {
- ssize_t done = 0;
- int ret;
- while (count) {
- size_t fillable, filled;
- if (off < x_start)
- fillable = min(count, (size_t)(x_start - off));
- else if (off >= x_end)
- fillable = count;
- else
- fillable = 0;
- if (fillable >= 4 && !(off % 4)) {
- u32 val;
- if (iswrite) {
- if (copy_from_user(&val, buf, 4))
- return -EFAULT;
- ret = vfio_pci_iowrite32(vdev, test_mem,
- val, io + off);
- if (ret)
- return ret;
- } else {
- ret = vfio_pci_ioread32(vdev, test_mem,
- &val, io + off);
- if (ret)
- return ret;
- if (copy_to_user(buf, &val, 4))
- return -EFAULT;
- }
- filled = 4;
- } else if (fillable >= 2 && !(off % 2)) {
- u16 val;
- if (iswrite) {
- if (copy_from_user(&val, buf, 2))
- return -EFAULT;
- ret = vfio_pci_iowrite16(vdev, test_mem,
- val, io + off);
- if (ret)
- return ret;
- } else {
- ret = vfio_pci_ioread16(vdev, test_mem,
- &val, io + off);
- if (ret)
- return ret;
- if (copy_to_user(buf, &val, 2))
- return -EFAULT;
- }
- filled = 2;
- } else if (fillable) {
- u8 val;
- if (iswrite) {
- if (copy_from_user(&val, buf, 1))
- return -EFAULT;
- ret = vfio_pci_iowrite8(vdev, test_mem,
- val, io + off);
- if (ret)
- return ret;
- } else {
- ret = vfio_pci_ioread8(vdev, test_mem,
- &val, io + off);
- if (ret)
- return ret;
- if (copy_to_user(buf, &val, 1))
- return -EFAULT;
- }
- filled = 1;
- } else {
- /* Fill reads with -1, drop writes */
- filled = min(count, (size_t)(x_end - off));
- if (!iswrite) {
- u8 val = 0xFF;
- size_t i;
- for (i = 0; i < filled; i++)
- if (copy_to_user(buf + i, &val, 1))
- return -EFAULT;
- }
- }
- count -= filled;
- done += filled;
- off += filled;
- buf += filled;
- }
- return done;
- }
- static int vfio_pci_setup_barmap(struct vfio_pci_core_device *vdev, int bar)
- {
- struct pci_dev *pdev = vdev->pdev;
- int ret;
- void __iomem *io;
- if (vdev->barmap[bar])
- return 0;
- ret = pci_request_selected_regions(pdev, 1 << bar, "vfio");
- if (ret)
- return ret;
- io = pci_iomap(pdev, bar, 0);
- if (!io) {
- pci_release_selected_regions(pdev, 1 << bar);
- return -ENOMEM;
- }
- vdev->barmap[bar] = io;
- return 0;
- }
- ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite)
- {
- struct pci_dev *pdev = vdev->pdev;
- loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
- int bar = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
- size_t x_start = 0, x_end = 0;
- resource_size_t end;
- void __iomem *io;
- struct resource *res = &vdev->pdev->resource[bar];
- ssize_t done;
- if (pci_resource_start(pdev, bar))
- end = pci_resource_len(pdev, bar);
- else if (bar == PCI_ROM_RESOURCE &&
- pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
- end = 0x20000;
- else
- return -EINVAL;
- if (pos >= end)
- return -EINVAL;
- count = min(count, (size_t)(end - pos));
- if (bar == PCI_ROM_RESOURCE) {
- /*
- * The ROM can fill less space than the BAR, so we start the
- * excluded range at the end of the actual ROM. This makes
- * filling large ROM BARs much faster.
- */
- io = pci_map_rom(pdev, &x_start);
- if (!io) {
- done = -ENOMEM;
- goto out;
- }
- x_end = end;
- } else {
- int ret = vfio_pci_setup_barmap(vdev, bar);
- if (ret) {
- done = ret;
- goto out;
- }
- io = vdev->barmap[bar];
- }
- if (bar == vdev->msix_bar) {
- x_start = vdev->msix_offset;
- x_end = vdev->msix_offset + vdev->msix_size;
- }
- done = do_io_rw(vdev, res->flags & IORESOURCE_MEM, io, buf, pos,
- count, x_start, x_end, iswrite);
- if (done >= 0)
- *ppos += done;
- if (bar == PCI_ROM_RESOURCE)
- pci_unmap_rom(pdev, io);
- out:
- return done;
- }
- #ifdef CONFIG_VFIO_PCI_VGA
- ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
- size_t count, loff_t *ppos, bool iswrite)
- {
- int ret;
- loff_t off, pos = *ppos & VFIO_PCI_OFFSET_MASK;
- void __iomem *iomem = NULL;
- unsigned int rsrc;
- bool is_ioport;
- ssize_t done;
- if (!vdev->has_vga)
- return -EINVAL;
- if (pos > 0xbfffful)
- return -EINVAL;
- switch ((u32)pos) {
- case 0xa0000 ... 0xbffff:
- count = min(count, (size_t)(0xc0000 - pos));
- iomem = ioremap(0xa0000, 0xbffff - 0xa0000 + 1);
- off = pos - 0xa0000;
- rsrc = VGA_RSRC_LEGACY_MEM;
- is_ioport = false;
- break;
- case 0x3b0 ... 0x3bb:
- count = min(count, (size_t)(0x3bc - pos));
- iomem = ioport_map(0x3b0, 0x3bb - 0x3b0 + 1);
- off = pos - 0x3b0;
- rsrc = VGA_RSRC_LEGACY_IO;
- is_ioport = true;
- break;
- case 0x3c0 ... 0x3df:
- count = min(count, (size_t)(0x3e0 - pos));
- iomem = ioport_map(0x3c0, 0x3df - 0x3c0 + 1);
- off = pos - 0x3c0;
- rsrc = VGA_RSRC_LEGACY_IO;
- is_ioport = true;
- break;
- default:
- return -EINVAL;
- }
- if (!iomem)
- return -ENOMEM;
- ret = vga_get_interruptible(vdev->pdev, rsrc);
- if (ret) {
- is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
- return ret;
- }
- /*
- * VGA MMIO is a legacy, non-BAR resource that hopefully allows
- * probing, so we don't currently worry about access in relation
- * to the memory enable bit in the command register.
- */
- done = do_io_rw(vdev, false, iomem, buf, off, count, 0, 0, iswrite);
- vga_put(vdev->pdev, rsrc);
- is_ioport ? ioport_unmap(iomem) : iounmap(iomem);
- if (done >= 0)
- *ppos += done;
- return done;
- }
- #endif
- static void vfio_pci_ioeventfd_do_write(struct vfio_pci_ioeventfd *ioeventfd,
- bool test_mem)
- {
- switch (ioeventfd->count) {
- case 1:
- vfio_pci_iowrite8(ioeventfd->vdev, test_mem,
- ioeventfd->data, ioeventfd->addr);
- break;
- case 2:
- vfio_pci_iowrite16(ioeventfd->vdev, test_mem,
- ioeventfd->data, ioeventfd->addr);
- break;
- case 4:
- vfio_pci_iowrite32(ioeventfd->vdev, test_mem,
- ioeventfd->data, ioeventfd->addr);
- break;
- #ifdef iowrite64
- case 8:
- vfio_pci_iowrite64(ioeventfd->vdev, test_mem,
- ioeventfd->data, ioeventfd->addr);
- break;
- #endif
- }
- }
- static int vfio_pci_ioeventfd_handler(void *opaque, void *unused)
- {
- struct vfio_pci_ioeventfd *ioeventfd = opaque;
- struct vfio_pci_core_device *vdev = ioeventfd->vdev;
- if (ioeventfd->test_mem) {
- if (!down_read_trylock(&vdev->memory_lock))
- return 1; /* Lock contended, use thread */
- if (!__vfio_pci_memory_enabled(vdev)) {
- up_read(&vdev->memory_lock);
- return 0;
- }
- }
- vfio_pci_ioeventfd_do_write(ioeventfd, false);
- if (ioeventfd->test_mem)
- up_read(&vdev->memory_lock);
- return 0;
- }
- static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
- {
- struct vfio_pci_ioeventfd *ioeventfd = opaque;
- vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
- }
- int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
- uint64_t data, int count, int fd)
- {
- struct pci_dev *pdev = vdev->pdev;
- loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
- int ret, bar = VFIO_PCI_OFFSET_TO_INDEX(offset);
- struct vfio_pci_ioeventfd *ioeventfd;
- /* Only support ioeventfds into BARs */
- if (bar > VFIO_PCI_BAR5_REGION_INDEX)
- return -EINVAL;
- if (pos + count > pci_resource_len(pdev, bar))
- return -EINVAL;
- /* Disallow ioeventfds working around MSI-X table writes */
- if (bar == vdev->msix_bar &&
- !(pos + count <= vdev->msix_offset ||
- pos >= vdev->msix_offset + vdev->msix_size))
- return -EINVAL;
- #ifndef iowrite64
- if (count == 8)
- return -EINVAL;
- #endif
- ret = vfio_pci_setup_barmap(vdev, bar);
- if (ret)
- return ret;
- mutex_lock(&vdev->ioeventfds_lock);
- list_for_each_entry(ioeventfd, &vdev->ioeventfds_list, next) {
- if (ioeventfd->pos == pos && ioeventfd->bar == bar &&
- ioeventfd->data == data && ioeventfd->count == count) {
- if (fd == -1) {
- vfio_virqfd_disable(&ioeventfd->virqfd);
- list_del(&ioeventfd->next);
- vdev->ioeventfds_nr--;
- kfree(ioeventfd);
- ret = 0;
- } else
- ret = -EEXIST;
- goto out_unlock;
- }
- }
- if (fd < 0) {
- ret = -ENODEV;
- goto out_unlock;
- }
- if (vdev->ioeventfds_nr >= VFIO_PCI_IOEVENTFD_MAX) {
- ret = -ENOSPC;
- goto out_unlock;
- }
- ioeventfd = kzalloc(sizeof(*ioeventfd), GFP_KERNEL);
- if (!ioeventfd) {
- ret = -ENOMEM;
- goto out_unlock;
- }
- ioeventfd->vdev = vdev;
- ioeventfd->addr = vdev->barmap[bar] + pos;
- ioeventfd->data = data;
- ioeventfd->pos = pos;
- ioeventfd->bar = bar;
- ioeventfd->count = count;
- ioeventfd->test_mem = vdev->pdev->resource[bar].flags & IORESOURCE_MEM;
- ret = vfio_virqfd_enable(ioeventfd, vfio_pci_ioeventfd_handler,
- vfio_pci_ioeventfd_thread, NULL,
- &ioeventfd->virqfd, fd);
- if (ret) {
- kfree(ioeventfd);
- goto out_unlock;
- }
- list_add(&ioeventfd->next, &vdev->ioeventfds_list);
- vdev->ioeventfds_nr++;
- out_unlock:
- mutex_unlock(&vdev->ioeventfds_lock);
- return ret;
- }
|