summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/Kconfig36
-rw-r--r--drivers/vfio/Makefile6
-rw-r--r--drivers/vfio/container.c145
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c3
-rw-r--r--drivers/vfio/group.c877
-rw-r--r--drivers/vfio/iommufd.c158
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c6
-rw-r--r--drivers/vfio/pci/mlx5/main.c3
-rw-r--r--drivers/vfio/pci/vfio_pci.c3
-rw-r--r--drivers/vfio/platform/vfio_amba.c3
-rw-r--r--drivers/vfio/platform/vfio_platform.c3
-rw-r--r--drivers/vfio/vfio.h120
-rw-r--r--drivers/vfio/vfio_main.c983
13 files changed, 1434 insertions, 912 deletions
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 86c381ceb9a1..286c1663bd75 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -2,8 +2,9 @@
menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
select IOMMU_API
- select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ depends on IOMMUFD || !IOMMUFD
select INTERVAL_TREE
+ select VFIO_CONTAINER if IOMMUFD=n
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/driver-api/vfio.rst for more details.
@@ -11,6 +12,18 @@ menuconfig VFIO
If you don't know what to do here, say N.
if VFIO
+config VFIO_CONTAINER
+ bool "Support for the VFIO container /dev/vfio/vfio"
+ select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ default y
+ help
+ The VFIO container is the classic interface to VFIO for establishing
+ IOMMU mappings. If N is selected here then IOMMUFD must be used to
+ manage the mappings.
+
+ Unless testing IOMMUFD say Y here.
+
+if VFIO_CONTAINER
config VFIO_IOMMU_TYPE1
tristate
default n
@@ -20,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE
depends on SPAPR_TCE_IOMMU
default VFIO
-config VFIO_SPAPR_EEH
- tristate
- depends on EEH && VFIO_IOMMU_SPAPR_TCE
- default VFIO
-
-config VFIO_VIRQFD
- tristate
- select EVENTFD
- default n
-
config VFIO_NOIOMMU
bool "VFIO No-IOMMU support"
help
@@ -43,6 +46,17 @@ config VFIO_NOIOMMU
this mode since there is no IOMMU to provide DMA translation.
If you don't know what to do here, say N.
+endif
+
+config VFIO_SPAPR_EEH
+ tristate
+ depends on EEH && VFIO_IOMMU_SPAPR_TCE
+ default VFIO
+
+config VFIO_VIRQFD
+ tristate
+ select EVENTFD
+ default n
source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b693a1169286..3783db7e8082 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -4,8 +4,10 @@ vfio_virqfd-y := virqfd.o
obj-$(CONFIG_VFIO) += vfio.o
vfio-y += vfio_main.o \
- iova_bitmap.o \
- container.o
+ group.o \
+ iova_bitmap.o
+vfio-$(CONFIG_IOMMUFD) += iommufd.o
+vfio-$(CONFIG_VFIO_CONTAINER) += container.o
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d74164abbf40..b7a9560ab25e 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device)
device->group->container->iommu_data, device);
}
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg)
+static long
+vfio_container_ioctl_check_extension(struct vfio_container *container,
+ unsigned long arg)
{
struct vfio_iommu_driver *driver;
long ret = 0;
@@ -511,14 +512,15 @@ void vfio_group_detach_container(struct vfio_group *group)
vfio_container_put(container);
}
-int vfio_device_assign_container(struct vfio_device *device)
+int vfio_group_use_container(struct vfio_group *group)
{
- struct vfio_group *group = device->group;
-
lockdep_assert_held(&group->group_lock);
- if (!group->container || !group->container->iommu_driver ||
- WARN_ON(!group->container_users))
+ /*
+ * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
+ * VFIO_SET_IOMMU hasn't been done yet.
+ */
+ if (!group->container->iommu_driver)
return -EINVAL;
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
@@ -529,122 +531,56 @@ int vfio_device_assign_container(struct vfio_device *device)
return 0;
}
-void vfio_device_unassign_container(struct vfio_device *device)
+void vfio_group_unuse_container(struct vfio_group *group)
{
- mutex_lock(&device->group->group_lock);
- WARN_ON(device->group->container_users <= 1);
- device->group->container_users--;
- fput(device->group->opened_file);
- mutex_unlock(&device->group->group_lock);
+ lockdep_assert_held(&group->group_lock);
+
+ WARN_ON(group->container_users <= 1);
+ group->container_users--;
+ fput(group->opened_file);
}
-/*
- * Pin contiguous user pages and return their associated host pages for local
- * domain only.
- * @device [in] : device
- * @iova [in] : starting IOVA of user pages to be pinned.
- * @npage [in] : count of pages to be pinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @prot [in] : protection flags
- * @pages[out] : array of host pages
- * Return error or number of pages pinned.
- *
- * A driver may only call this function if the vfio_device was created
- * by vfio_register_emulated_iommu_dev().
- */
-int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
- int npage, int prot, struct page **pages)
+int vfio_device_container_pin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage,
+ int prot, struct page **pages)
{
- struct vfio_container *container;
- struct vfio_group *group = device->group;
- struct vfio_iommu_driver *driver;
- int ret;
-
- if (!pages || !npage || !vfio_assert_device_open(device))
- return -EINVAL;
+ struct vfio_container *container = device->group->container;
+ struct iommu_group *iommu_group = device->group->iommu_group;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;
- /* group->container cannot change while a vfio device is open */
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data,
- group->iommu_group, iova,
- npage, prot, pages);
- else
- ret = -ENOTTY;
-
- return ret;
+ if (unlikely(!driver || !driver->ops->pin_pages))
+ return -ENOTTY;
+ return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
+ npage, prot, pages);
}
-EXPORT_SYMBOL(vfio_pin_pages);
-/*
- * Unpin contiguous host pages for local domain only.
- * @device [in] : device
- * @iova [in] : starting address of user pages to be unpinned.
- * @npage [in] : count of pages to be unpinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- */
-void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+void vfio_device_container_unpin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage)
{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
+ struct vfio_container *container = device->group->container;
if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
return;
- if (WARN_ON(!vfio_assert_device_open(device)))
- return;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- driver->ops->unpin_pages(container->iommu_data, iova, npage);
+ container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
+ npage);
}
-EXPORT_SYMBOL(vfio_unpin_pages);
-/*
- * This interface allows the CPUs to perform some sort of virtual DMA on
- * behalf of the device.
- *
- * CPUs read/write from/into a range of IOVAs pointing to user space memory
- * into/from a kernel buffer.
- *
- * As the read/write of user space memory is conducted via the CPUs and is
- * not a real device DMA, it is not necessary to pin the user space memory.
- *
- * @device [in] : VFIO device
- * @iova [in] : base IOVA of a user space buffer
- * @data [in] : pointer to kernel buffer
- * @len [in] : kernel buffer length
- * @write : indicate read or write
- * Return error code on failure or 0 on success.
- */
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
- size_t len, bool write)
+int vfio_device_container_dma_rw(struct vfio_device *device,
+ dma_addr_t iova, void *data,
+ size_t len, bool write)
{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- if (!data || len <= 0 || !vfio_assert_device_open(device))
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
+ struct vfio_container *container = device->group->container;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
- if (likely(driver && driver->ops->dma_rw))
- ret = driver->ops->dma_rw(container->iommu_data,
- iova, data, len, write);
- else
- ret = -ENOTTY;
- return ret;
+ if (unlikely(!driver || !driver->ops->dma_rw))
+ return -ENOTTY;
+ return driver->ops->dma_rw(container->iommu_data, iova, data, len,
+ write);
}
-EXPORT_SYMBOL(vfio_dma_rw);
int __init vfio_container_init(void)
{
@@ -678,3 +614,6 @@ void vfio_container_cleanup(void)
misc_deregister(&vfio_dev);
mutex_destroy(&vfio.iommu_drivers_lock);
}
+
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index b16874e913e4..5cd4bb476440 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = {
.read = vfio_fsl_mc_read,
.write = vfio_fsl_mc_write,
.mmap = vfio_fsl_mc_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static struct fsl_mc_driver vfio_fsl_mc_driver = {
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
new file mode 100644
index 000000000000..c5d8bf10495e
--- /dev/null
+++ b/drivers/vfio/group.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO core
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
+ * Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc. All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+#include <linux/anon_inodes.h>
+#include "vfio.h"
+
+static struct vfio {
+ struct class *class;
+ struct list_head group_list;
+ struct mutex group_lock; /* locks group_list */
+ struct ida group_ida;
+ dev_t group_devt;
+} vfio;
+
+static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
+ char *buf)
+{
+ struct vfio_device *it, *device = ERR_PTR(-ENODEV);
+
+ mutex_lock(&group->device_lock);
+ list_for_each_entry(it, &group->device_list, group_next) {
+ int ret;
+
+ if (it->ops->match) {
+ ret = it->ops->match(it, buf);
+ if (ret < 0) {
+ device = ERR_PTR(ret);
+ break;
+ }
+ } else {
+ ret = !strcmp(dev_name(it->dev), buf);
+ }
+
+ if (ret && vfio_device_try_get_registration(it)) {
+ device = it;
+ break;
+ }
+ }
+ mutex_unlock(&group->device_lock);
+
+ return device;
+}
+
+/*
+ * VFIO Group fd, /dev/vfio/$GROUP
+ */
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+ lockdep_assert_held(&group->group_lock);
+ /*
+ * There can only be users if there is a container, and if there is a
+ * container there must be users.
+ */
+ WARN_ON(!group->container != !group->container_users);
+
+ return group->container || group->iommufd;
+}
+
+/*
+ * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
+ * if there was no container to unset. Since the ioctl is called on
+ * the group, we know that still exists, therefore the only valid
+ * transition here is 1->0.
+ */
+static int vfio_group_ioctl_unset_container(struct vfio_group *group)
+{
+ int ret = 0;
+
+ mutex_lock(&group->group_lock);
+ if (!vfio_group_has_iommu(group)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (group->container) {
+ if (group->container_users != 1) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ vfio_group_detach_container(group);
+ }
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
+ }
+
+out_unlock:
+ mutex_unlock(&group->group_lock);
+ return ret;
+}
+
+static int vfio_group_ioctl_set_container(struct vfio_group *group,
+ int __user *arg)
+{
+ struct vfio_container *container;
+ struct iommufd_ctx *iommufd;
+ struct fd f;
+ int ret;
+ int fd;
+
+ if (get_user(fd, arg))
+ return -EFAULT;
+
+ f = fdget(fd);
+ if (!f.file)
+ return -EBADF;
+
+ mutex_lock(&group->group_lock);
+ if (vfio_group_has_iommu(group)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (!group->iommu_group) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ container = vfio_container_from_file(f.file);
+ if (container) {
+ ret = vfio_container_attach_group(container, group);
+ goto out_unlock;
+ }
+
+ iommufd = iommufd_ctx_from_file(f.file);
+ if (!IS_ERR(iommufd)) {
+ u32 ioas_id;
+
+ ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+ if (ret) {
+ iommufd_ctx_put(group->iommufd);
+ goto out_unlock;
+ }
+
+ group->iommufd = iommufd;
+ goto out_unlock;
+ }
+
+ /* The FD passed is not recognized. */
+ ret = -EBADFD;
+
+out_unlock:
+ mutex_unlock(&group->group_lock);
+ fdput(f);
+ return ret;
+}
+
+static int vfio_device_group_open(struct vfio_device *device)
+{
+ int ret;
+
+ mutex_lock(&device->group->group_lock);
+ if (!vfio_group_has_iommu(device->group)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * Here we pass the KVM pointer with the group under the lock. If the
+ * device driver will use it, it must obtain a reference and release it
+ * during close_device.
+ */
+ ret = vfio_device_open(device, device->group->iommufd,
+ device->group->kvm);
+
+out_unlock:
+ mutex_unlock(&device->group->group_lock);
+ return ret;
+}
+
+void vfio_device_group_close(struct vfio_device *device)
+{
+ mutex_lock(&device->group->group_lock);
+ vfio_device_close(device, device->group->iommufd);
+ mutex_unlock(&device->group->group_lock);
+}
+
+static struct file *vfio_device_open_file(struct vfio_device *device)
+{
+ struct file *filep;
+ int ret;
+
+ ret = vfio_device_group_open(device);
+ if (ret)
+ goto err_out;
+
+ /*
+ * We can't use anon_inode_getfd() because we need to modify
+ * the f_mode flags directly to allow more than just ioctls
+ */
+ filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
+ device, O_RDWR);
+ if (IS_ERR(filep)) {
+ ret = PTR_ERR(filep);
+ goto err_close_device;
+ }
+
+ /*
+ * TODO: add an anon_inode interface to do this.
+ * Appears to be missing by lack of need rather than
+ * explicitly prevented. Now there's need.
+ */
+ filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
+
+ if (device->group->type == VFIO_NO_IOMMU)
+ dev_warn(device->dev, "vfio-noiommu device opened by user "
+ "(%s:%d)\n", current->comm, task_pid_nr(current));
+ /*
+ * On success the ref of device is moved to the file and
+ * put in vfio_device_fops_release()
+ */
+ return filep;
+
+err_close_device:
+ vfio_device_group_close(device);
+err_out:
+ return ERR_PTR(ret);
+}
+
+static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
+ char __user *arg)
+{
+ struct vfio_device *device;
+ struct file *filep;
+ char *buf;
+ int fdno;
+ int ret;
+
+ buf = strndup_user(arg, PAGE_SIZE);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
+ device = vfio_device_get_from_name(group, buf);
+ kfree(buf);
+ if (IS_ERR(device))
+ return PTR_ERR(device);
+
+ fdno = get_unused_fd_flags(O_CLOEXEC);
+ if (fdno < 0) {
+ ret = fdno;
+ goto err_put_device;
+ }
+
+ filep = vfio_device_open_file(device);
+ if (IS_ERR(filep)) {
+ ret = PTR_ERR(filep);
+ goto err_put_fdno;
+ }
+
+ fd_install(fdno, filep);
+ return fdno;
+
+err_put_fdno:
+ put_unused_fd(fdno);
+err_put_device:
+ vfio_device_put_registration(device);
+ return ret;
+}
+
+static int vfio_group_ioctl_get_status(struct vfio_group *group,
+ struct vfio_group_status __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_group_status, flags);
+ struct vfio_group_status status;
+
+ if (copy_from_user(&status, arg, minsz))
+ return -EFAULT;
+
+ if (status.argsz < minsz)
+ return -EINVAL;
+
+ status.flags = 0;
+
+ mutex_lock(&group->group_lock);
+ if (!group->iommu_group) {
+ mutex_unlock(&group->group_lock);
+ return -ENODEV;
+ }
+
+ /*
+ * With the container FD the iommu_group_claim_dma_owner() is done
+ * during SET_CONTAINER but for IOMMFD this is done during
+ * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+ * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+ * to viability.
+ */
+ if (vfio_group_has_iommu(group))
+ status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
+ VFIO_GROUP_FLAGS_VIABLE;
+ else if (!iommu_group_dma_owner_claimed(group->iommu_group))
+ status.flags |= VFIO_GROUP_FLAGS_VIABLE;
+ mutex_unlock(&group->group_lock);
+
+ if (copy_to_user(arg, &status, minsz))
+ return -EFAULT;
+ return 0;
+}
+
+static long vfio_group_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = filep->private_data;
+ void __user *uarg = (void __user *)arg;
+
+ switch (cmd) {
+ case VFIO_GROUP_GET_DEVICE_FD:
+ return vfio_group_ioctl_get_device_fd(group, uarg);
+ case VFIO_GROUP_GET_STATUS:
+ return vfio_group_ioctl_get_status(group, uarg);
+ case VFIO_GROUP_SET_CONTAINER:
+ return vfio_group_ioctl_set_container(group, uarg);
+ case VFIO_GROUP_UNSET_CONTAINER:
+ return vfio_group_ioctl_unset_container(group);
+ default:
+ return -ENOTTY;
+ }
+}
+
+static int vfio_group_fops_open(struct inode *inode, struct file *filep)
+{
+ struct vfio_group *group =
+ container_of(inode->i_cdev, struct vfio_group, cdev);
+ int ret;
+
+ mutex_lock(&group->group_lock);
+
+ /*
+ * drivers can be zero if this races with vfio_device_remove_group(), it
+ * will be stable at 0 under the group rwsem
+ */
+ if (refcount_read(&group->drivers) == 0) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ /*
+ * Do we need multiple instances of the group open? Seems not.
+ */
+ if (group->opened_file) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ group->opened_file = filep;
+ filep->private_data = group;
+ ret = 0;
+out_unlock:
+ mutex_unlock(&group->group_lock);
+ return ret;
+}
+
+static int vfio_group_fops_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_group *group = filep->private_data;
+
+ filep->private_data = NULL;
+
+ mutex_lock(&group->group_lock);
+ /*
+ * Device FDs hold a group file reference, therefore the group release
+ * is only called when there are no open devices.
+ */
+ WARN_ON(group->notifier.head);
+ if (group->container)
+ vfio_group_detach_container(group);
+ if (group->iommufd) {
+ iommufd_ctx_put(group->iommufd);
+ group->iommufd = NULL;
+ }
+ group->opened_file = NULL;
+ mutex_unlock(&group->group_lock);
+ return 0;
+}
+
+static const struct file_operations vfio_group_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = vfio_group_fops_unl_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .open = vfio_group_fops_open,
+ .release = vfio_group_fops_release,
+};
+
+/*
+ * Group objects - create, release, get, put, search
+ */
+static struct vfio_group *
+vfio_group_find_from_iommu(struct iommu_group *iommu_group)
+{
+ struct vfio_group *group;
+
+ lockdep_assert_held(&vfio.group_lock);
+
+ /*
+ * group->iommu_group from the vfio.group_list cannot be NULL
+ * under the vfio.group_lock.
+ */
+ list_for_each_entry(group, &vfio.group_list, vfio_next) {
+ if (group->iommu_group == iommu_group)
+ return group;
+ }
+ return NULL;
+}
+
+static void vfio_group_release(struct device *dev)
+{
+ struct vfio_group *group = container_of(dev, struct vfio_group, dev);
+
+ mutex_destroy(&group->device_lock);
+ mutex_destroy(&group->group_lock);
+ WARN_ON(group->iommu_group);
+ ida_free(&vfio.group_ida, MINOR(group->dev.devt));
+ kfree(group);
+}
+
+static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
+ enum vfio_group_type type)
+{
+ struct vfio_group *group;
+ int minor;
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
+ if (minor < 0) {
+ kfree(group);
+ return ERR_PTR(minor);
+ }
+
+ device_initialize(&group->dev);
+ group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
+ group->dev.class = vfio.class;
+ group->dev.release = vfio_group_release;
+ cdev_init(&group->cdev, &vfio_group_fops);
+ group->cdev.owner = THIS_MODULE;
+
+ refcount_set(&group->drivers, 1);
+ mutex_init(&group->group_lock);
+ INIT_LIST_HEAD(&group->device_list);
+ mutex_init(&group->device_lock);
+ group->iommu_group = iommu_group;
+ /* put in vfio_group_release() */
+ iommu_group_ref_get(iommu_group);
+ group->type = type;
+ BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
+ return group;
+}
+
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+ enum vfio_group_type type)
+{
+ struct vfio_group *group;
+ struct vfio_group *ret;
+ int err;
+
+ lockdep_assert_held(&vfio.group_lock);
+
+ group = vfio_group_alloc(iommu_group, type);
+ if (IS_ERR(group))
+ return group;
+
+ err = dev_set_name(&group->dev, "%s%d",
+ group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
+ iommu_group_id(iommu_group));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_put;
+ }
+
+ err = cdev_device_add(&group->cdev, &group->dev);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_put;
+ }
+
+ list_add(&group->vfio_next, &vfio.group_list);
+
+ return group;
+
+err_put:
+ put_device(&group->dev);
+ return ret;
+}
+
+static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
+ enum vfio_group_type type)
+{
+ struct iommu_group *iommu_group;
+ struct vfio_group *group;
+ int ret;
+
+ iommu_group = iommu_group_alloc();
+ if (IS_ERR(iommu_group))
+ return ERR_CAST(iommu_group);
+
+ ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
+ if (ret)
+ goto out_put_group;
+ ret = iommu_group_add_device(iommu_group, dev);
+ if (ret)
+ goto out_put_group;
+
+ mutex_lock(&vfio.group_lock);
+ group = vfio_create_group(iommu_group, type);
+ mutex_unlock(&vfio.group_lock);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto out_remove_device;
+ }
+ iommu_group_put(iommu_group);
+ return group;
+
+out_remove_device:
+ iommu_group_remove_device(dev);
+out_put_group:
+ iommu_group_put(iommu_group);
+ return ERR_PTR(ret);
+}
+
+static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
+{
+ struct vfio_device *device;
+
+ mutex_lock(&group->device_lock);
+ list_for_each_entry(device, &group->device_list, group_next) {
+ if (device->dev == dev) {
+ mutex_unlock(&group->device_lock);
+ return true;
+ }
+ }
+ mutex_unlock(&group->device_lock);
+ return false;
+}
+
+static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
+{
+ struct iommu_group *iommu_group;
+ struct vfio_group *group;
+
+ iommu_group = iommu_group_get(dev);
+ if (!iommu_group && vfio_noiommu) {
+ /*
+ * With noiommu enabled, create an IOMMU group for devices that
+ * don't already have one, implying no IOMMU hardware/driver
+ * exists. Taint the kernel because we're about to give a DMA
+ * capable device to a user without IOMMU protection.
+ */
+ group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
+ if (!IS_ERR(group)) {
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+ dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+ }
+ return group;
+ }
+
+ if (!iommu_group)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+ * restore cache coherency. It has to be checked here because it is only
+ * valid for cases where we are using iommu groups.
+ */
+ if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
+ iommu_group_put(iommu_group);
+ return ERR_PTR(-EINVAL);
+ }
+
+ mutex_lock(&vfio.group_lock);
+ group = vfio_group_find_from_iommu(iommu_group);
+ if (group) {
+ if (WARN_ON(vfio_group_has_device(group, dev)))
+ group = ERR_PTR(-EINVAL);
+ else
+ refcount_inc(&group->drivers);
+ } else {
+ group = vfio_create_group(iommu_group, VFIO_IOMMU);
+ }
+ mutex_unlock(&vfio.group_lock);
+
+ /* The vfio_group holds a reference to the iommu_group */
+ iommu_group_put(iommu_group);
+ return group;
+}
+
+int vfio_device_set_group(struct vfio_device *device,
+ enum vfio_group_type type)
+{
+ struct vfio_group *group;
+
+ if (type == VFIO_IOMMU)
+ group = vfio_group_find_or_alloc(device->dev);
+ else
+ group = vfio_noiommu_group_alloc(device->dev, type);
+
+ if (IS_ERR(group))
+ return PTR_ERR(group);
+
+ /* Our reference on group is moved to the device */
+ device->group = group;
+ return 0;
+}
+
+void vfio_device_remove_group(struct vfio_device *device)
+{
+ struct vfio_group *group = device->group;
+ struct iommu_group *iommu_group;
+
+ if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
+ iommu_group_remove_device(device->dev);
+
+ /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
+ if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
+ return;
+ list_del(&group->vfio_next);
+
+ /*
+ * We could concurrently probe another driver in the group that might
+ * race vfio_device_remove_group() with vfio_get_group(), so we have to
+ * ensure that the sysfs is all cleaned up under lock otherwise the
+ * cdev_device_add() will fail due to the name aready existing.
+ */
+ cdev_device_del(&group->cdev, &group->dev);
+
+ mutex_lock(&group->group_lock);
+ /*
+ * These data structures all have paired operations that can only be
+ * undone when the caller holds a live reference on the device. Since
+ * all pairs must be undone these WARN_ON's indicate some caller did not
+ * properly hold the group reference.
+ */
+ WARN_ON(!list_empty(&group->device_list));
+ WARN_ON(group->notifier.head);
+
+ /*
+ * Revoke all users of group->iommu_group. At this point we know there
+ * are no devices active because we are unplugging the last one. Setting
+ * iommu_group to NULL blocks all new users.
+ */
+ if (group->container)
+ vfio_group_detach_container(group);
+ iommu_group = group->iommu_group;
+ group->iommu_group = NULL;
+ mutex_unlock(&group->group_lock);
+ mutex_unlock(&vfio.group_lock);
+
+ iommu_group_put(iommu_group);
+ put_device(&group->dev);
+}
+
+void vfio_device_group_register(struct vfio_device *device)
+{
+ mutex_lock(&device->group->device_lock);
+ list_add(&device->group_next, &device->group->device_list);
+ mutex_unlock(&device->group->device_lock);
+}
+
+void vfio_device_group_unregister(struct vfio_device *device)
+{
+ mutex_lock(&device->group->device_lock);
+ list_del(&device->group_next);
+ mutex_unlock(&device->group->device_lock);
+}
+
+int vfio_device_group_use_iommu(struct vfio_device *device)
+{
+ struct vfio_group *group = device->group;
+ int ret = 0;
+
+ lockdep_assert_held(&group->group_lock);
+
+ if (WARN_ON(!group->container))
+ return -EINVAL;
+
+ ret = vfio_group_use_container(group);
+ if (ret)
+ return ret;
+ vfio_device_container_register(device);
+ return 0;
+}
+
+void vfio_device_group_unuse_iommu(struct vfio_device *device)
+{
+ struct vfio_group *group = device->group;
+
+ lockdep_assert_held(&group->group_lock);
+
+ if (WARN_ON(!group->container))
+ return;
+
+ vfio_device_container_unregister(device);
+ vfio_group_unuse_container(group);
+}
+
+bool vfio_device_has_container(struct vfio_device *device)
+{
+ return device->group->container;
+}
+
+/**
+ * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
+ * @file: VFIO group file
+ *
+ * The returned iommu_group is valid as long as a ref is held on the file. This
+ * returns a reference on the group. This function is deprecated, only the SPAPR
+ * path in kvm should call it.
+ */
+struct iommu_group *vfio_file_iommu_group(struct file *file)
+{
+ struct vfio_group *group = file->private_data;
+ struct iommu_group *iommu_group = NULL;
+
+ if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+ return NULL;
+
+ if (!vfio_file_is_group(file))
+ return NULL;
+
+ mutex_lock(&group->group_lock);
+ if (group->iommu_group) {
+ iommu_group = group->iommu_group;
+ iommu_group_ref_get(iommu_group);
+ }
+ mutex_unlock(&group->group_lock);
+ return iommu_group;
+}
+EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
+
+/**
+ * vfio_file_is_group - True if the file is usable with VFIO aPIS
+ * @file: VFIO group file
+ */
+bool vfio_file_is_group(struct file *file)
+{
+ return file->f_op == &vfio_group_fops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_group);
+
+/**
+ * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
+ * is always CPU cache coherent
+ * @file: VFIO group file
+ *
+ * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
+ * bit in DMA transactions. A return of false indicates that the user has
+ * rights to access additional instructions such as wbinvd on x86.
+ */
+bool vfio_file_enforced_coherent(struct file *file)
+{
+ struct vfio_group *group = file->private_data;
+ struct vfio_device *device;
+ bool ret = true;
+
+ if (!vfio_file_is_group(file))
+ return true;
+
+ /*
+ * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
+ * any domain later attached to it will also not support it. If the cap
+ * is set then the iommu_domain eventually attached to the device/group
+ * must use a domain with enforce_cache_coherency().
+ */
+ mutex_lock(&group->device_lock);
+ list_for_each_entry(device, &group->device_list, group_next) {
+ if (!device_iommu_capable(device->dev,
+ IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
+ ret = false;
+ break;
+ }
+ }
+ mutex_unlock(&group->device_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
+
+/**
+ * vfio_file_set_kvm - Link a kvm with VFIO drivers
+ * @file: VFIO group file
+ * @kvm: KVM to link
+ *
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
+ */
+void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
+{
+ struct vfio_group *group = file->private_data;
+
+ if (!vfio_file_is_group(file))
+ return;
+
+ mutex_lock(&group->group_lock);
+ group->kvm = kvm;
+ mutex_unlock(&group->group_lock);
+}
+EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
+
+/**
+ * vfio_file_has_dev - True if the VFIO file is a handle for device
+ * @file: VFIO file to check
+ * @device: Device that must be part of the file
+ *
+ * Returns true if given file has permission to manipulate the given device.
+ */
+bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
+{
+ struct vfio_group *group = file->private_data;
+
+ if (!vfio_file_is_group(file))
+ return false;
+
+ return group == device->group;
+}
+EXPORT_SYMBOL_GPL(vfio_file_has_dev);
+
+static char *vfio_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
+}
+
+int __init vfio_group_init(void)
+{
+ int ret;
+
+ ida_init(&vfio.group_ida);
+ mutex_init(&vfio.group_lock);
+ INIT_LIST_HEAD(&vfio.group_list);
+
+ ret = vfio_container_init();
+ if (ret)
+ return ret;
+
+ /* /dev/vfio/$GROUP */
+ vfio.class = class_create(THIS_MODULE, "vfio");
+ if (IS_ERR(vfio.class)) {
+ ret = PTR_ERR(vfio.class);
+ goto err_group_class;
+ }
+
+ vfio.class->devnode = vfio_devnode;
+
+ ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
+ if (ret)
+ goto err_alloc_chrdev;
+ return 0;
+
+err_alloc_chrdev:
+ class_destroy(vfio.class);
+ vfio.class = NULL;
+err_group_class:
+ vfio_container_cleanup();
+ return ret;
+}
+
+void vfio_group_cleanup(void)
+{
+ WARN_ON(!list_empty(&vfio.group_list));
+ ida_destroy(&vfio.group_ida);
+ unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
+ class_destroy(vfio.class);
+ vfio.class = NULL;
+ vfio_container_cleanup();
+}
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
new file mode 100644
index 000000000000..4f82a6fa7c6c
--- /dev/null
+++ b/drivers/vfio/iommufd.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "vfio.h"
+
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
+
+int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
+{
+ u32 ioas_id;
+ u32 device_id;
+ int ret;
+
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ /*
+ * If the driver doesn't provide this op then it means the device does
+ * not do DMA at all. So nothing to do.
+ */
+ if (!vdev->ops->bind_iommufd)
+ return 0;
+
+ ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
+ if (ret)
+ return ret;
+
+ ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+ if (ret)
+ goto err_unbind;
+ ret = vdev->ops->attach_ioas(vdev, &ioas_id);
+ if (ret)
+ goto err_unbind;
+
+ /*
+ * The legacy path has no way to return the device id or the selected
+ * pt_id
+ */
+ return 0;
+
+err_unbind:
+ if (vdev->ops->unbind_iommufd)
+ vdev->ops->unbind_iommufd(vdev);
+ return ret;
+}
+
+void vfio_iommufd_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->ops->unbind_iommufd)
+ vdev->ops->unbind_iommufd(vdev);
+}
+
+/*
+ * The physical standard ops mean that the iommufd_device is bound to the
+ * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
+ * using this ops set should call vfio_register_group_dev()
+ */
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+ struct iommufd_device *idev;
+
+ idev = iommufd_device_bind(ictx, vdev->dev, out_device_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+ vdev->iommufd_device = idev;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
+
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->iommufd_attached) {
+ iommufd_device_detach(vdev->iommufd_device);
+ vdev->iommufd_attached = false;
+ }
+ iommufd_device_unbind(vdev->iommufd_device);
+ vdev->iommufd_device = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
+
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+ int rc;
+
+ rc = iommufd_device_attach(vdev->iommufd_device, pt_id);
+ if (rc)
+ return rc;
+ vdev->iommufd_attached = true;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
+
+/*
+ * The emulated standard ops mean that vfio_device is going to use the
+ * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this
+ * ops set should call vfio_register_emulated_iommu_dev().
+ */
+
+static void vfio_emulated_unmap(void *data, unsigned long iova,
+ unsigned long length)
+{
+ struct vfio_device *vdev = data;
+
+ vdev->ops->dma_unmap(vdev, iova, length);
+}
+
+static const struct iommufd_access_ops vfio_user_ops = {
+ .needs_pin_pages = 1,
+ .unmap = vfio_emulated_unmap,
+};
+
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+ struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ vdev->iommufd_ictx = ictx;
+ iommufd_ctx_get(ictx);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
+
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev)
+{
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ if (vdev->iommufd_access) {
+ iommufd_access_destroy(vdev->iommufd_access);
+ vdev->iommufd_access = NULL;
+ }
+ iommufd_ctx_put(vdev->iommufd_ictx);
+ vdev->iommufd_ictx = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind);
+
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+ struct iommufd_access *user;
+
+ lockdep_assert_held(&vdev->dev_set->lock);
+
+ user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops,
+ vdev);
+ if (IS_ERR(user))
+ return PTR_ERR(user);
+ vdev->iommufd_access = user;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 39eeca18a0f7..40019b11c5a9 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
.mmap = hisi_acc_vfio_pci_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
@@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index fd6ccb8454a2..32d1f38d351e 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int mlx5vf_pci_probe(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1d4919edfbde..29091ee2e984 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = {
.mmap = vfio_pci_core_mmap,
.request = vfio_pci_core_request,
.match = vfio_pci_core_match,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index eaea63e5294c..5a046098d0bd 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = {
.read = vfio_platform_read,
.write = vfio_platform_write,
.mmap = vfio_platform_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static const struct amba_id pl330_ids[] = {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 82cedcebfd90..b87c3b708783 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = {
.read = vfio_platform_read,
.write = vfio_platform_write,
.mmap = vfio_platform_mmap,
+ .bind_iommufd = vfio_iommufd_physical_bind,
+ .unbind_iommufd = vfio_iommufd_physical_unbind,
+ .attach_ioas = vfio_iommufd_physical_attach_ioas,
};
static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index bcad54bbab08..2e05418fd18d 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -6,14 +6,25 @@
#ifndef __VFIO_VFIO_H__
#define __VFIO_VFIO_H__
+#include <linux/file.h>
#include <linux/device.h>
#include <linux/cdev.h>
#include <linux/module.h>
+struct iommufd_ctx;
struct iommu_group;
struct vfio_device;
struct vfio_container;
+void vfio_device_put_registration(struct vfio_device *device);
+bool vfio_device_try_get_registration(struct vfio_device *device);
+int vfio_device_open(struct vfio_device *device,
+ struct iommufd_ctx *iommufd, struct kvm *kvm);
+void vfio_device_close(struct vfio_device *device,
+ struct iommufd_ctx *iommufd);
+
+extern const struct file_operations vfio_device_fops;
+
enum vfio_group_type {
/*
* Physical device with IOMMU backing.
@@ -54,14 +65,30 @@ struct vfio_group {
struct list_head device_list;
struct mutex device_lock;
struct list_head vfio_next;
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
struct list_head container_next;
+#endif
enum vfio_group_type type;
struct mutex group_lock;
struct kvm *kvm;
struct file *opened_file;
struct blocking_notifier_head notifier;
+ struct iommufd_ctx *iommufd;
};
+int vfio_device_set_group(struct vfio_device *device,
+ enum vfio_group_type type);
+void vfio_device_remove_group(struct vfio_device *device);
+void vfio_device_group_register(struct vfio_device *device);
+void vfio_device_group_unregister(struct vfio_device *device);
+int vfio_device_group_use_iommu(struct vfio_device *device);
+void vfio_device_group_unuse_iommu(struct vfio_device *device);
+void vfio_device_group_close(struct vfio_device *device);
+bool vfio_device_has_container(struct vfio_device *device);
+int __init vfio_group_init(void);
+void vfio_group_cleanup(void);
+
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
@@ -109,20 +136,101 @@ struct vfio_iommu_driver {
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
-bool vfio_assert_device_open(struct vfio_device *device);
-
struct vfio_container *vfio_container_from_file(struct file *filep);
-int vfio_device_assign_container(struct vfio_device *device);
-void vfio_device_unassign_container(struct vfio_device *device);
+int vfio_group_use_container(struct vfio_group *group);
+void vfio_group_unuse_container(struct vfio_group *group);
int vfio_container_attach_group(struct vfio_container *container,
struct vfio_group *group);
void vfio_group_detach_container(struct vfio_group *group);
void vfio_device_container_register(struct vfio_device *device);
void vfio_device_container_unregister(struct vfio_device *device);
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg);
+int vfio_device_container_pin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage,
+ int prot, struct page **pages);
+void vfio_device_container_unpin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage);
+int vfio_device_container_dma_rw(struct vfio_device *device,
+ dma_addr_t iova, void *data,
+ size_t len, bool write);
+
int __init vfio_container_init(void);
void vfio_container_cleanup(void);
+#else
+static inline struct vfio_container *
+vfio_container_from_file(struct file *filep)
+{
+ return NULL;
+}
+
+static inline int vfio_group_use_container(struct vfio_group *group)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_unuse_container(struct vfio_group *group)
+{
+}
+
+static inline int vfio_container_attach_group(struct vfio_container *container,
+ struct vfio_group *group)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_detach_container(struct vfio_group *group)
+{
+}
+
+static inline void vfio_device_container_register(struct vfio_device *device)
+{
+}
+
+static inline void vfio_device_container_unregister(struct vfio_device *device)
+{
+}
+
+static inline int vfio_device_container_pin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage,
+ int prot, struct page **pages)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_device_container_unpin_pages(struct vfio_device *device,
+ dma_addr_t iova, int npage)
+{
+}
+
+static inline int vfio_device_container_dma_rw(struct vfio_device *device,
+ dma_addr_t iova, void *data,
+ size_t len, bool write)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int vfio_container_init(void)
+{
+ return 0;
+}
+static inline void vfio_container_cleanup(void)
+{
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
+void vfio_iommufd_unbind(struct vfio_device *device);
+#else
+static inline int vfio_iommufd_bind(struct vfio_device *device,
+ struct iommufd_ctx *ictx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void vfio_iommufd_unbind(struct vfio_device *device)
+{
+}
+#endif
#ifdef CONFIG_VFIO_NOIOMMU
extern bool vfio_noiommu __read_mostly;
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 6e8804fe0095..e21ff965141e 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -13,8 +13,6 @@
#include <linux/cdev.h>
#include <linux/compat.h>
#include <linux/device.h>
-#include <linux/file.h>
-#include <linux/anon_inodes.h>
#include <linux/fs.h>
#include <linux/idr.h>
#include <linux/iommu.h>
@@ -35,6 +33,7 @@
#include <linux/pm_runtime.h>
#include <linux/interval_tree.h>
#include <linux/iova_bitmap.h>
+#include <linux/iommufd.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -42,17 +41,11 @@
#define DRIVER_DESC "VFIO - User Level meta-driver"
static struct vfio {
- struct class *class;
- struct list_head group_list;
- struct mutex group_lock; /* locks group_list */
- struct ida group_ida;
- dev_t group_devt;
struct class *device_class;
struct ida device_ida;
} vfio;
static DEFINE_XARRAY(vfio_device_set_xa);
-static const struct file_operations vfio_group_fops;
int vfio_assign_device_set(struct vfio_device *device, void *set_id)
{
@@ -139,207 +132,20 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
/*
- * Group objects - create, release, get, put, search
- */
-static struct vfio_group *
-__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
-{
- struct vfio_group *group;
-
- /*
- * group->iommu_group from the vfio.group_list cannot be NULL
- * under the vfio.group_lock.
- */
- list_for_each_entry(group, &vfio.group_list, vfio_next) {
- if (group->iommu_group == iommu_group) {
- refcount_inc(&group->drivers);
- return group;
- }
- }
- return NULL;
-}
-
-static struct vfio_group *
-vfio_group_get_from_iommu(struct iommu_group *iommu_group)
-{
- struct vfio_group *group;
-
- mutex_lock(&vfio.group_lock);
- group = __vfio_group_get_from_iommu(iommu_group);
- mutex_unlock(&vfio.group_lock);
- return group;
-}
-
-static void vfio_group_release(struct device *dev)
-{
- struct vfio_group *group = container_of(dev, struct vfio_group, dev);
-
- mutex_destroy(&group->device_lock);
- mutex_destroy(&group->group_lock);
- WARN_ON(group->iommu_group);
- ida_free(&vfio.group_ida, MINOR(group->dev.devt));
- kfree(group);
-}
-
-static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
- enum vfio_group_type type)
-{
- struct vfio_group *group;
- int minor;
-
- group = kzalloc(sizeof(*group), GFP_KERNEL);
- if (!group)
- return ERR_PTR(-ENOMEM);
-
- minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
- if (minor < 0) {
- kfree(group);
- return ERR_PTR(minor);
- }
-
- device_initialize(&group->dev);
- group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
- group->dev.class = vfio.class;
- group->dev.release = vfio_group_release;
- cdev_init(&group->cdev, &vfio_group_fops);
- group->cdev.owner = THIS_MODULE;
-
- refcount_set(&group->drivers, 1);
- mutex_init(&group->group_lock);
- INIT_LIST_HEAD(&group->device_list);
- mutex_init(&group->device_lock);
- group->iommu_group = iommu_group;
- /* put in vfio_group_release() */
- iommu_group_ref_get(iommu_group);
- group->type = type;
- BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
-
- return group;
-}
-
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
- enum vfio_group_type type)
-{
- struct vfio_group *group;
- struct vfio_group *ret;
- int err;
-
- group = vfio_group_alloc(iommu_group, type);
- if (IS_ERR(group))
- return group;
-
- err = dev_set_name(&group->dev, "%s%d",
- group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
- iommu_group_id(iommu_group));
- if (err) {
- ret = ERR_PTR(err);
- goto err_put;
- }
-
- mutex_lock(&vfio.group_lock);
-
- /* Did we race creating this group? */
- ret = __vfio_group_get_from_iommu(iommu_group);
- if (ret)
- goto err_unlock;
-
- err = cdev_device_add(&group->cdev, &group->dev);
- if (err) {
- ret = ERR_PTR(err);
- goto err_unlock;
- }
-
- list_add(&group->vfio_next, &vfio.group_list);
-
- mutex_unlock(&vfio.group_lock);
- return group;
-
-err_unlock:
- mutex_unlock(&vfio.group_lock);
-err_put:
- put_device(&group->dev);
- return ret;
-}
-
-static void vfio_device_remove_group(struct vfio_device *device)
-{
- struct vfio_group *group = device->group;
- struct iommu_group *iommu_group;
-
- if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
- iommu_group_remove_device(device->dev);
-
- /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
- if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
- return;
- list_del(&group->vfio_next);
-
- /*
- * We could concurrently probe another driver in the group that might
- * race vfio_device_remove_group() with vfio_get_group(), so we have to
- * ensure that the sysfs is all cleaned up under lock otherwise the
- * cdev_device_add() will fail due to the name aready existing.
- */
- cdev_device_del(&group->cdev, &group->dev);
-
- mutex_lock(&group->group_lock);
- /*
- * These data structures all have paired operations that can only be
- * undone when the caller holds a live reference on the device. Since
- * all pairs must be undone these WARN_ON's indicate some caller did not
- * properly hold the group reference.
- */
- WARN_ON(!list_empty(&group->device_list));
- WARN_ON(group->notifier.head);
-
- /*
- * Revoke all users of group->iommu_group. At this point we know there
- * are no devices active because we are unplugging the last one. Setting
- * iommu_group to NULL blocks all new users.
- */
- if (group->container)
- vfio_group_detach_container(group);
- iommu_group = group->iommu_group;
- group->iommu_group = NULL;
- mutex_unlock(&group->group_lock);
- mutex_unlock(&vfio.group_lock);
-
- iommu_group_put(iommu_group);
- put_device(&group->dev);
-}
-
-/*
* Device objects - create, release, get, put, search
*/
/* Device reference always implies a group reference */
-static void vfio_device_put_registration(struct vfio_device *device)
+void vfio_device_put_registration(struct vfio_device *device)
{
if (refcount_dec_and_test(&device->refcount))
complete(&device->comp);
}
-static bool vfio_device_try_get_registration(struct vfio_device *device)
+bool vfio_device_try_get_registration(struct vfio_device *device)
{
return refcount_inc_not_zero(&device->refcount);
}
-static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
- struct device *dev)
-{
- struct vfio_device *device;
-
- mutex_lock(&group->device_lock);
- list_for_each_entry(device, &group->device_list, group_next) {
- if (device->dev == dev &&
- vfio_device_try_get_registration(device)) {
- mutex_unlock(&group->device_lock);
- return device;
- }
- }
- mutex_unlock(&group->device_lock);
- return NULL;
-}
-
/*
* VFIO driver API
*/
@@ -448,94 +254,15 @@ void vfio_free_device(struct vfio_device *device)
}
EXPORT_SYMBOL_GPL(vfio_free_device);
-static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
- enum vfio_group_type type)
-{
- struct iommu_group *iommu_group;
- struct vfio_group *group;
- int ret;
-
- iommu_group = iommu_group_alloc();
- if (IS_ERR(iommu_group))
- return ERR_CAST(iommu_group);
-
- ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
- if (ret)
- goto out_put_group;
- ret = iommu_group_add_device(iommu_group, dev);
- if (ret)
- goto out_put_group;
-
- group = vfio_create_group(iommu_group, type);
- if (IS_ERR(group)) {
- ret = PTR_ERR(group);
- goto out_remove_device;
- }
- iommu_group_put(iommu_group);
- return group;
-
-out_remove_device:
- iommu_group_remove_device(dev);
-out_put_group:
- iommu_group_put(iommu_group);
- return ERR_PTR(ret);
-}
-
-static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
-{
- struct iommu_group *iommu_group;
- struct vfio_group *group;
-
- iommu_group = iommu_group_get(dev);
- if (!iommu_group && vfio_noiommu) {
- /*
- * With noiommu enabled, create an IOMMU group for devices that
- * don't already have one, implying no IOMMU hardware/driver
- * exists. Taint the kernel because we're about to give a DMA
- * capable device to a user without IOMMU protection.
- */
- group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
- if (!IS_ERR(group)) {
- add_taint(TAINT_USER, LOCKDEP_STILL_OK);
- dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
- }
- return group;
- }
-
- if (!iommu_group)
- return ERR_PTR(-EINVAL);
-
- /*
- * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
- * restore cache coherency. It has to be checked here because it is only
- * valid for cases where we are using iommu groups.
- */
- if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
- iommu_group_put(iommu_group);
- return ERR_PTR(-EINVAL);
- }
-
- group = vfio_group_get_from_iommu(iommu_group);
- if (!group)
- group = vfio_create_group(iommu_group, VFIO_IOMMU);
-
- /* The vfio_group holds a reference to the iommu_group */
- iommu_group_put(iommu_group);
- return group;
-}
-
static int __vfio_register_dev(struct vfio_device *device,
- struct vfio_group *group)
+ enum vfio_group_type type)
{
- struct vfio_device *existing_device;
int ret;
- /*
- * In all cases group is the output of one of the group allocation
- * functions and we have group->drivers incremented for us.
- */
- if (IS_ERR(group))
- return PTR_ERR(group);
+ if (WARN_ON(device->ops->bind_iommufd &&
+ (!device->ops->unbind_iommufd ||
+ !device->ops->attach_ioas)))
+ return -EINVAL;
/*
* If the driver doesn't specify a set then the device is added to a
@@ -544,25 +271,13 @@ static int __vfio_register_dev(struct vfio_device *device,
if (!device->dev_set)
vfio_assign_device_set(device, device);
- existing_device = vfio_group_get_device(group, device->dev);
- if (existing_device) {
- /*
- * group->iommu_group is non-NULL because we hold the drivers
- * refcount.
- */
- dev_WARN(device->dev, "Device already exists on group %d\n",
- iommu_group_id(group->iommu_group));
- vfio_device_put_registration(existing_device);
- ret = -EBUSY;
- goto err_out;
- }
-
- /* Our reference on group is moved to the device */
- device->group = group;
-
ret = dev_set_name(&device->device, "vfio%d", device->index);
if (ret)
- goto err_out;
+ return ret;
+
+ ret = vfio_device_set_group(device, type);
+ if (ret)
+ return ret;
ret = device_add(&device->device);
if (ret)
@@ -571,9 +286,7 @@ static int __vfio_register_dev(struct vfio_device *device,
/* Refcounting can't start until the driver calls register */
refcount_set(&device->refcount, 1);
- mutex_lock(&group->device_lock);
- list_add(&device->group_next, &group->device_list);
- mutex_unlock(&group->device_lock);
+ vfio_device_group_register(device);
return 0;
err_out:
@@ -583,8 +296,7 @@ err_out:
int vfio_register_group_dev(struct vfio_device *device)
{
- return __vfio_register_dev(device,
- vfio_group_find_or_alloc(device->dev));
+ return __vfio_register_dev(device, VFIO_IOMMU);
}
EXPORT_SYMBOL_GPL(vfio_register_group_dev);
@@ -594,46 +306,15 @@ EXPORT_SYMBOL_GPL(vfio_register_group_dev);
*/
int vfio_register_emulated_iommu_dev(struct vfio_device *device)
{
- return __vfio_register_dev(device,
- vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
+ return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
}
EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
-static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
- char *buf)
-{
- struct vfio_device *it, *device = ERR_PTR(-ENODEV);
-
- mutex_lock(&group->device_lock);
- list_for_each_entry(it, &group->device_list, group_next) {
- int ret;
-
- if (it->ops->match) {
- ret = it->ops->match(it, buf);
- if (ret < 0) {
- device = ERR_PTR(ret);
- break;
- }
- } else {
- ret = !strcmp(dev_name(it->dev), buf);
- }
-
- if (ret && vfio_device_try_get_registration(it)) {
- device = it;
- break;
- }
- }
- mutex_unlock(&group->device_lock);
-
- return device;
-}
-
/*
* Decrement the device reference count and wait for the device to be
* removed. Open file descriptors for the device... */
void vfio_unregister_group_dev(struct vfio_device *device)
{
- struct vfio_group *group = device->group;
unsigned int i = 0;
bool interrupted = false;
long rc;
@@ -661,333 +342,101 @@ void vfio_unregister_group_dev(struct vfio_device *device)
}
}
- mutex_lock(&group->device_lock);
- list_del(&device->group_next);
- mutex_unlock(&group->device_lock);
+ vfio_device_group_unregister(device);
/* Balances device_add in register path */
device_del(&device->device);
+ /* Balances vfio_device_set_group in register path */
vfio_device_remove_group(device);
}
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
-/*
- * VFIO Group fd, /dev/vfio/$GROUP
- */
-/*
- * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
- * if there was no container to unset. Since the ioctl is called on
- * the group, we know that still exists, therefore the only valid
- * transition here is 1->0.
- */
-static int vfio_group_ioctl_unset_container(struct vfio_group *group)
+/* true if the vfio_device has open_device() called but not close_device() */
+static bool vfio_assert_device_open(struct vfio_device *device)
{
- int ret = 0;
-
- mutex_lock(&group->group_lock);
- if (!group->container) {
- ret = -EINVAL;
- goto out_unlock;
- }
- if (group->container_users != 1) {
- ret = -EBUSY;
- goto out_unlock;
- }
- vfio_group_detach_container(group);
-
-out_unlock:
- mutex_unlock(&group->group_lock);
- return ret;
+ return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static int vfio_group_ioctl_set_container(struct vfio_group *group,
- int __user *arg)
+static int vfio_device_first_open(struct vfio_device *device,
+ struct iommufd_ctx *iommufd, struct kvm *kvm)
{
- struct vfio_container *container;
- struct fd f;
int ret;
- int fd;
- if (get_user(fd, arg))
- return -EFAULT;
+ lockdep_assert_held(&device->dev_set->lock);
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
+ if (!try_module_get(device->dev->driver->owner))
+ return -ENODEV;
- mutex_lock(&group->group_lock);
- if (group->container || WARN_ON(group->container_users)) {
- ret = -EINVAL;
- goto out_unlock;
- }
- if (!group->iommu_group) {
- ret = -ENODEV;
- goto out_unlock;
- }
+ if (iommufd)
+ ret = vfio_iommufd_bind(device, iommufd);
+ else
+ ret = vfio_device_group_use_iommu(device);
+ if (ret)
+ goto err_module_put;
- container = vfio_container_from_file(f.file);
- ret = -EINVAL;
- if (container) {
- ret = vfio_container_attach_group(container, group);
- goto out_unlock;
+ device->kvm = kvm;
+ if (device->ops->open_device) {
+ ret = device->ops->open_device(device);
+ if (ret)
+ goto err_unuse_iommu;
}
+ return 0;
-out_unlock:
- mutex_unlock(&group->group_lock);
- fdput(f);
+err_unuse_iommu:
+ device->kvm = NULL;
+ if (iommufd)
+ vfio_iommufd_unbind(device);
+ else
+ vfio_device_group_unuse_iommu(device);
+err_module_put:
+ module_put(device->dev->driver->owner);
return ret;
}
-static const struct file_operations vfio_device_fops;
-
-/* true if the vfio_device has open_device() called but not close_device() */
-bool vfio_assert_device_open(struct vfio_device *device)
+static void vfio_device_last_close(struct vfio_device *device,
+ struct iommufd_ctx *iommufd)
{
- return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
+ lockdep_assert_held(&device->dev_set->lock);
+
+ if (device->ops->close_device)
+ device->ops->close_device(device);
+ device->kvm = NULL;
+ if (iommufd)
+ vfio_iommufd_unbind(device);
+ else
+ vfio_device_group_unuse_iommu(device);
+ module_put(device->dev->driver->owner);
}
-static struct file *vfio_device_open(struct vfio_device *device)
+int vfio_device_open(struct vfio_device *device,
+ struct iommufd_ctx *iommufd, struct kvm *kvm)
{
- struct file *filep;
- int ret;
-
- mutex_lock(&device->group->group_lock);
- ret = vfio_device_assign_container(device);
- mutex_unlock(&device->group->group_lock);
- if (ret)
- return ERR_PTR(ret);
-
- if (!try_module_get(device->dev->driver->owner)) {
- ret = -ENODEV;
- goto err_unassign_container;
- }
+ int ret = 0;
mutex_lock(&device->dev_set->lock);
device->open_count++;
if (device->open_count == 1) {
- /*
- * Here we pass the KVM pointer with the group under the read
- * lock. If the device driver will use it, it must obtain a
- * reference and release it during close_device.
- */
- mutex_lock(&device->group->group_lock);
- device->kvm = device->group->kvm;
-
- if (device->ops->open_device) {
- ret = device->ops->open_device(device);
- if (ret)
- goto err_undo_count;
- }
- vfio_device_container_register(device);
- mutex_unlock(&device->group->group_lock);
- }
- mutex_unlock(&device->dev_set->lock);
-
- /*
- * We can't use anon_inode_getfd() because we need to modify
- * the f_mode flags directly to allow more than just ioctls
- */
- filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
- device, O_RDWR);
- if (IS_ERR(filep)) {
- ret = PTR_ERR(filep);
- goto err_close_device;
- }
-
- /*
- * TODO: add an anon_inode interface to do this.
- * Appears to be missing by lack of need rather than
- * explicitly prevented. Now there's need.
- */
- filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
-
- if (device->group->type == VFIO_NO_IOMMU)
- dev_warn(device->dev, "vfio-noiommu device opened by user "
- "(%s:%d)\n", current->comm, task_pid_nr(current));
- /*
- * On success the ref of device is moved to the file and
- * put in vfio_device_fops_release()
- */
- return filep;
-
-err_close_device:
- mutex_lock(&device->dev_set->lock);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1) {
- if (device->ops->close_device)
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
+ ret = vfio_device_first_open(device, iommufd, kvm);
+ if (ret)
+ device->open_count--;
}
-err_undo_count:
- mutex_unlock(&device->group->group_lock);
- device->open_count--;
- if (device->open_count == 0 && device->kvm)
- device->kvm = NULL;
mutex_unlock(&device->dev_set->lock);
- module_put(device->dev->driver->owner);
-err_unassign_container:
- vfio_device_unassign_container(device);
- return ERR_PTR(ret);
-}
-
-static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
- char __user *arg)
-{
- struct vfio_device *device;
- struct file *filep;
- char *buf;
- int fdno;
- int ret;
-
- buf = strndup_user(arg, PAGE_SIZE);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- device = vfio_device_get_from_name(group, buf);
- kfree(buf);
- if (IS_ERR(device))
- return PTR_ERR(device);
-
- fdno = get_unused_fd_flags(O_CLOEXEC);
- if (fdno < 0) {
- ret = fdno;
- goto err_put_device;
- }
-
- filep = vfio_device_open(device);
- if (IS_ERR(filep)) {
- ret = PTR_ERR(filep);
- goto err_put_fdno;
- }
-
- fd_install(fdno, filep);
- return fdno;
-
-err_put_fdno:
- put_unused_fd(fdno);
-err_put_device:
- vfio_device_put_registration(device);
- return ret;
-}
-
-static int vfio_group_ioctl_get_status(struct vfio_group *group,
- struct vfio_group_status __user *arg)
-{
- unsigned long minsz = offsetofend(struct vfio_group_status, flags);
- struct vfio_group_status status;
-
- if (copy_from_user(&status, arg, minsz))
- return -EFAULT;
-
- if (status.argsz < minsz)
- return -EINVAL;
-
- status.flags = 0;
-
- mutex_lock(&group->group_lock);
- if (!group->iommu_group) {
- mutex_unlock(&group->group_lock);
- return -ENODEV;
- }
-
- if (group->container)
- status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
- VFIO_GROUP_FLAGS_VIABLE;
- else if (!iommu_group_dma_owner_claimed(group->iommu_group))
- status.flags |= VFIO_GROUP_FLAGS_VIABLE;
- mutex_unlock(&group->group_lock);
-
- if (copy_to_user(arg, &status, minsz))
- return -EFAULT;
- return 0;
-}
-
-static long vfio_group_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
-{
- struct vfio_group *group = filep->private_data;
- void __user *uarg = (void __user *)arg;
-
- switch (cmd) {
- case VFIO_GROUP_GET_DEVICE_FD:
- return vfio_group_ioctl_get_device_fd(group, uarg);
- case VFIO_GROUP_GET_STATUS:
- return vfio_group_ioctl_get_status(group, uarg);
- case VFIO_GROUP_SET_CONTAINER:
- return vfio_group_ioctl_set_container(group, uarg);
- case VFIO_GROUP_UNSET_CONTAINER:
- return vfio_group_ioctl_unset_container(group);
- default:
- return -ENOTTY;
- }
-}
-
-static int vfio_group_fops_open(struct inode *inode, struct file *filep)
-{
- struct vfio_group *group =
- container_of(inode->i_cdev, struct vfio_group, cdev);
- int ret;
-
- mutex_lock(&group->group_lock);
- /*
- * drivers can be zero if this races with vfio_device_remove_group(), it
- * will be stable at 0 under the group rwsem
- */
- if (refcount_read(&group->drivers) == 0) {
- ret = -ENODEV;
- goto out_unlock;
- }
-
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- /*
- * Do we need multiple instances of the group open? Seems not.
- */
- if (group->opened_file) {
- ret = -EBUSY;
- goto out_unlock;
- }
- group->opened_file = filep;
- filep->private_data = group;
- ret = 0;
-out_unlock:
- mutex_unlock(&group->group_lock);
return ret;
}
-static int vfio_group_fops_release(struct inode *inode, struct file *filep)
+void vfio_device_close(struct vfio_device *device,
+ struct iommufd_ctx *iommufd)
{
- struct vfio_group *group = filep->private_data;
-
- filep->private_data = NULL;
-
- mutex_lock(&group->group_lock);
- /*
- * Device FDs hold a group file reference, therefore the group release
- * is only called when there are no open devices.
- */
- WARN_ON(group->notifier.head);
- if (group->container)
- vfio_group_detach_container(group);
- group->opened_file = NULL;
- mutex_unlock(&group->group_lock);
- return 0;
+ mutex_lock(&device->dev_set->lock);
+ vfio_assert_device_open(device);
+ if (device->open_count == 1)
+ vfio_device_last_close(device, iommufd);
+ device->open_count--;
+ mutex_unlock(&device->dev_set->lock);
}
-static const struct file_operations vfio_group_fops = {
- .owner = THIS_MODULE,
- .unlocked_ioctl = vfio_group_fops_unl_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
- .open = vfio_group_fops_open,
- .release = vfio_group_fops_release,
-};
-
/*
* Wrapper around pm_runtime_resume_and_get().
* Return error code on failure or 0 on success.
@@ -1028,24 +477,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
- mutex_lock(&device->dev_set->lock);
- vfio_assert_device_open(device);
- mutex_lock(&device->group->group_lock);
- if (device->open_count == 1) {
- if (device->ops->close_device)
- device->ops->close_device(device);
-
- vfio_device_container_unregister(device);
- }
- mutex_unlock(&device->group->group_lock);
- device->open_count--;
- if (device->open_count == 0)
- device->kvm = NULL;
- mutex_unlock(&device->dev_set->lock);
-
- module_put(device->dev->driver->owner);
-
- vfio_device_unassign_container(device);
+ vfio_device_group_close(device);
vfio_device_put_registration(device);
@@ -1568,7 +1000,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
return device->ops->mmap(device, vma);
}
-static const struct file_operations vfio_device_fops = {
+const struct file_operations vfio_device_fops = {
.owner = THIS_MODULE,
.release = vfio_device_fops_release,
.read = vfio_device_fops_read,
@@ -1578,118 +1010,6 @@ static const struct file_operations vfio_device_fops = {
.mmap = vfio_device_fops_mmap,
};
-/**
- * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
- * @file: VFIO group file
- *
- * The returned iommu_group is valid as long as a ref is held on the file. This
- * returns a reference on the group. This function is deprecated, only the SPAPR
- * path in kvm should call it.
- */
-struct iommu_group *vfio_file_iommu_group(struct file *file)
-{
- struct vfio_group *group = file->private_data;
- struct iommu_group *iommu_group = NULL;
-
- if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
- return NULL;
-
- if (!vfio_file_is_group(file))
- return NULL;
-
- mutex_lock(&group->group_lock);
- if (group->iommu_group) {
- iommu_group = group->iommu_group;
- iommu_group_ref_get(iommu_group);
- }
- mutex_unlock(&group->group_lock);
- return iommu_group;
-}
-EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
-
-/**
- * vfio_file_is_group - True if the file is usable with VFIO aPIS
- * @file: VFIO group file
- */
-bool vfio_file_is_group(struct file *file)
-{
- return file->f_op == &vfio_group_fops;
-}
-EXPORT_SYMBOL_GPL(vfio_file_is_group);
-
-/**
- * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
- * is always CPU cache coherent
- * @file: VFIO group file
- *
- * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
- * bit in DMA transactions. A return of false indicates that the user has
- * rights to access additional instructions such as wbinvd on x86.
- */
-bool vfio_file_enforced_coherent(struct file *file)
-{
- struct vfio_group *group = file->private_data;
- bool ret;
-
- if (!vfio_file_is_group(file))
- return true;
-
- mutex_lock(&group->group_lock);
- if (group->container) {
- ret = vfio_container_ioctl_check_extension(group->container,
- VFIO_DMA_CC_IOMMU);
- } else {
- /*
- * Since the coherency state is determined only once a container
- * is attached the user must do so before they can prove they
- * have permission.
- */
- ret = true;
- }
- mutex_unlock(&group->group_lock);
- return ret;
-}
-EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
-
-/**
- * vfio_file_set_kvm - Link a kvm with VFIO drivers
- * @file: VFIO group file
- * @kvm: KVM to link
- *
- * When a VFIO device is first opened the KVM will be available in
- * device->kvm if one was associated with the group.
- */
-void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
-{
- struct vfio_group *group = file->private_data;
-
- if (!vfio_file_is_group(file))
- return;
-
- mutex_lock(&group->group_lock);
- group->kvm = kvm;
- mutex_unlock(&group->group_lock);
-}
-EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
-
-/**
- * vfio_file_has_dev - True if the VFIO file is a handle for device
- * @file: VFIO file to check
- * @device: Device that must be part of the file
- *
- * Returns true if given file has permission to manipulate the given device.
- */
-bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
-{
- struct vfio_group *group = file->private_data;
-
- if (!vfio_file_is_group(file))
- return false;
-
- return group == device->group;
-}
-EXPORT_SYMBOL_GPL(vfio_file_has_dev);
-
/*
* Sub-module support
*/
@@ -1810,35 +1130,136 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
- * Module/class support
+ * Pin contiguous user pages and return their associated host pages for local
+ * domain only.
+ * @device [in] : device
+ * @iova [in] : starting IOVA of user pages to be pinned.
+ * @npage [in] : count of pages to be pinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in] : protection flags
+ * @pages[out] : array of host pages
+ * Return error or number of pages pinned.
+ *
+ * A driver may only call this function if the vfio_device was created
+ * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
+ */
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
+{
+ /* group->container cannot change while a vfio device is open */
+ if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
+ return -EINVAL;
+ if (vfio_device_has_container(device))
+ return vfio_device_container_pin_pages(device, iova,
+ npage, prot, pages);
+ if (device->iommufd_access) {
+ int ret;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+ /*
+ * VFIO ignores the sub page offset, npages is from the start of
+ * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
+ * the sub page offset by doing:
+ * pages[0] + (iova % PAGE_SIZE)
+ */
+ ret = iommufd_access_pin_pages(
+ device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE, pages,
+ (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
+ if (ret)
+ return ret;
+ return npage;
+ }
+ return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin contiguous host pages for local domain only.
+ * @device [in] : device
+ * @iova [in] : starting address of user pages to be unpinned.
+ * @npage [in] : count of pages to be unpinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ */
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+{
+ if (WARN_ON(!vfio_assert_device_open(device)))
+ return;
+
+ if (vfio_device_has_container(device)) {
+ vfio_device_container_unpin_pages(device, iova, npage);
+ return;
+ }
+ if (device->iommufd_access) {
+ if (WARN_ON(iova > ULONG_MAX))
+ return;
+ iommufd_access_unpin_pages(device->iommufd_access,
+ ALIGN_DOWN(iova, PAGE_SIZE),
+ npage * PAGE_SIZE);
+ return;
+ }
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
+/*
+ * This interface allows the CPUs to perform some sort of virtual DMA on
+ * behalf of the device.
+ *
+ * CPUs read/write from/into a range of IOVAs pointing to user space memory
+ * into/from a kernel buffer.
+ *
+ * As the read/write of user space memory is conducted via the CPUs and is
+ * not a real device DMA, it is not necessary to pin the user space memory.
+ *
+ * @device [in] : VFIO device
+ * @iova [in] : base IOVA of a user space buffer
+ * @data [in] : pointer to kernel buffer
+ * @len [in] : kernel buffer length
+ * @write : indicate read or write
+ * Return error code on failure or 0 on success.
*/
-static char *vfio_devnode(struct device *dev, umode_t *mode)
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
+ size_t len, bool write)
{
- return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
+ if (!data || len <= 0 || !vfio_assert_device_open(device))
+ return -EINVAL;
+
+ if (vfio_device_has_container(device))
+ return vfio_device_container_dma_rw(device, iova,
+ data, len, write);
+
+ if (device->iommufd_access) {
+ unsigned int flags = 0;
+
+ if (iova > ULONG_MAX)
+ return -EINVAL;
+
+ /* VFIO historically tries to auto-detect a kthread */
+ if (!current->mm)
+ flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+ if (write)
+ flags |= IOMMUFD_ACCESS_RW_WRITE;
+ return iommufd_access_rw(device->iommufd_access, iova, data,
+ len, flags);
+ }
+ return -EINVAL;
}
+EXPORT_SYMBOL(vfio_dma_rw);
+/*
+ * Module/class support
+ */
static int __init vfio_init(void)
{
int ret;
- ida_init(&vfio.group_ida);
ida_init(&vfio.device_ida);
- mutex_init(&vfio.group_lock);
- INIT_LIST_HEAD(&vfio.group_list);
- ret = vfio_container_init();
+ ret = vfio_group_init();
if (ret)
return ret;
- /* /dev/vfio/$GROUP */
- vfio.class = class_create(THIS_MODULE, "vfio");
- if (IS_ERR(vfio.class)) {
- ret = PTR_ERR(vfio.class);
- goto err_group_class;
- }
-
- vfio.class->devnode = vfio_devnode;
-
/* /sys/class/vfio-dev/vfioX */
vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
if (IS_ERR(vfio.device_class)) {
@@ -1846,36 +1267,20 @@ static int __init vfio_init(void)
goto err_dev_class;
}
- ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
- if (ret)
- goto err_alloc_chrdev;
-
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0;
-err_alloc_chrdev:
- class_destroy(vfio.device_class);
- vfio.device_class = NULL;
err_dev_class:
- class_destroy(vfio.class);
- vfio.class = NULL;
-err_group_class:
- vfio_container_cleanup();
+ vfio_group_cleanup();
return ret;
}
static void __exit vfio_cleanup(void)
{
- WARN_ON(!list_empty(&vfio.group_list));
-
ida_destroy(&vfio.device_ida);
- ida_destroy(&vfio.group_ida);
- unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
class_destroy(vfio.device_class);
vfio.device_class = NULL;
- class_destroy(vfio.class);
- vfio_container_cleanup();
- vfio.class = NULL;
+ vfio_group_cleanup();
xa_destroy(&vfio_device_set_xa);
}
@@ -1886,6 +1291,4 @@ MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_ALIAS_MISCDEV(VFIO_MINOR);
-MODULE_ALIAS("devname:vfio/vfio");
MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");