diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/init.c | 7 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 124 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu_v2.c | 13 |
3 files changed, 117 insertions, 27 deletions
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 6e12a615117b..239556c1f698 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1849,8 +1849,13 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (ret) return ret; - if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) + if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { + if (!amd_iommu_unmap_flush) + pr_info("IOMMU batching is disabled due to virtualization\n"); + amd_iommu_np_cache = true; + amd_iommu_unmap_flush = true; + } init_iommu_perf_ctr(iommu); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 52fe2326042a..7dedbea9c67f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -425,9 +425,11 @@ static void amd_iommu_report_rmp_hw_error(volatile u32 *event) if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); - if (dev_data && __ratelimit(&dev_data->rs)) { - pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", - vmg_tag, spa, flags); + if (dev_data) { + if (__ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_HW_ERROR vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", + vmg_tag, spa, flags); + } } else { pr_err_ratelimited("Event logged [RMP_HW_ERROR device=%02x:%02x.%x, vmg_tag=0x%04x, spa=0x%llx, flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), @@ -456,9 +458,11 @@ static void amd_iommu_report_rmp_fault(volatile u32 *event) if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); - if (dev_data && __ratelimit(&dev_data->rs)) { - pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", - vmg_tag, gpa, flags_rmp, flags); + if (dev_data) { + if (__ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [RMP_PAGE_FAULT vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", + vmg_tag, gpa, flags_rmp, flags); + } } else { pr_err_ratelimited("Event logged [RMP_PAGE_FAULT device=%02x:%02x.%x, vmg_tag=0x%04x, gpa=0x%llx, flags_rmp=0x%04x, flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), @@ -480,11 +484,13 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, if (pdev) dev_data = dev_iommu_priv_get(&pdev->dev); - if (dev_data && __ratelimit(&dev_data->rs)) { - pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", - domain_id, address, flags); - } else if (printk_ratelimit()) { - pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", + if (dev_data) { + if (__ratelimit(&dev_data->rs)) { + pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", + domain_id, address, flags); + } + } else { + pr_err_ratelimited("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), domain_id, address, flags); } @@ -1261,15 +1267,52 @@ static void __domain_flush_pages(struct protection_domain *domain, } static void domain_flush_pages(struct protection_domain *domain, - u64 address, size_t size) + u64 address, size_t size, int pde) { - __domain_flush_pages(domain, address, size, 0); + if (likely(!amd_iommu_np_cache)) { + __domain_flush_pages(domain, address, size, pde); + return; + } + + /* + * When NpCache is on, we infer that we run in a VM and use a vIOMMU. + * In such setups it is best to avoid flushes of ranges which are not + * naturally aligned, since it would lead to flushes of unmodified + * PTEs. Such flushes would require the hypervisor to do more work than + * necessary. Therefore, perform repeated flushes of aligned ranges + * until you cover the range. Each iteration flushes the smaller + * between the natural alignment of the address that we flush and the + * greatest naturally aligned region that fits in the range. + */ + while (size != 0) { + int addr_alignment = __ffs(address); + int size_alignment = __fls(size); + int min_alignment; + size_t flush_size; + + /* + * size is always non-zero, but address might be zero, causing + * addr_alignment to be negative. As the casting of the + * argument in __ffs(address) to long might trim the high bits + * of the address on x86-32, cast to long when doing the check. + */ + if (likely((unsigned long)address != 0)) + min_alignment = min(addr_alignment, size_alignment); + else + min_alignment = size_alignment; + + flush_size = 1ul << min_alignment; + + __domain_flush_pages(domain, address, flush_size, pde); + address += flush_size; + size -= flush_size; + } } /* Flush the whole IO/TLB for a given protection domain - including PDE */ void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { - __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); + domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } void amd_iommu_domain_flush_complete(struct protection_domain *domain) @@ -1296,7 +1339,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - domain_flush_pages(domain, iova, size); + domain_flush_pages(domain, iova, size, 1); amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2016,6 +2059,16 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return ret; } +static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, + unsigned long iova, size_t size) +{ + struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + + if (ops->map) + domain_flush_np_cache(domain, iova, size); +} + static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, phys_addr_t paddr, size_t page_size, int iommu_prot, gfp_t gfp) @@ -2034,26 +2087,50 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - if (ops->map) { + if (ops->map) ret = ops->map(ops, iova, paddr, page_size, prot, gfp); - domain_flush_np_cache(domain, iova, page_size); - } return ret; } +static void amd_iommu_iotlb_gather_add_page(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + /* + * AMD's IOMMU can flush as many pages as necessary in a single flush. + * Unless we run in a virtual machine, which can be inferred according + * to whether "non-present cache" is on, it is probably best to prefer + * (potentially) too extensive TLB flushing (i.e., more misses) over + * mutliple TLB flushes (i.e., more flushes). For virtual machines the + * hypervisor needs to synchronize the host IOMMU PTEs with those of + * the guest, and the trade-off is different: unnecessary TLB flushes + * should be avoided. + */ + if (amd_iommu_np_cache && + iommu_iotlb_gather_is_disjoint(gather, iova, size)) + iommu_iotlb_sync(domain, gather); + + iommu_iotlb_gather_add_range(gather, iova, size); +} + static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, size_t page_size, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; + size_t r; if ((amd_iommu_pgtable == AMD_IOMMU_V1) && (domain->iop.mode == PAGE_MODE_NONE)) return 0; - return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; + r = (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; + + amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); + + return r; } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, @@ -2156,7 +2233,13 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) static void amd_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { - amd_iommu_flush_iotlb_all(domain); + struct protection_domain *dom = to_pdomain(domain); + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + domain_flush_pages(dom, gather->start, gather->end - gather->start, 1); + amd_iommu_domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); } static int amd_iommu_def_domain_type(struct device *dev) @@ -2185,6 +2268,7 @@ const struct iommu_ops amd_iommu_ops = { .attach_dev = amd_iommu_attach_device, .detach_dev = amd_iommu_detach_device, .map = amd_iommu_map, + .iotlb_sync_map = amd_iommu_iotlb_sync_map, .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, .probe_device = amd_iommu_probe_device, diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index f8d4ad421e07..a9e568276c99 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "AMD-Vi: " fmt +#include <linux/refcount.h> #include <linux/mmu_notifier.h> #include <linux/amd-iommu.h> #include <linux/mm_types.h> @@ -33,7 +34,7 @@ struct pri_queue { struct pasid_state { struct list_head list; /* For global state-list */ - atomic_t count; /* Reference count */ + refcount_t count; /* Reference count */ unsigned mmu_notifier_count; /* Counting nested mmu_notifier calls */ struct mm_struct *mm; /* mm_struct for the faults */ @@ -242,7 +243,7 @@ static struct pasid_state *get_pasid_state(struct device_state *dev_state, ret = *ptr; if (ret) - atomic_inc(&ret->count); + refcount_inc(&ret->count); out_unlock: spin_unlock_irqrestore(&dev_state->lock, flags); @@ -257,14 +258,14 @@ static void free_pasid_state(struct pasid_state *pasid_state) static void put_pasid_state(struct pasid_state *pasid_state) { - if (atomic_dec_and_test(&pasid_state->count)) + if (refcount_dec_and_test(&pasid_state->count)) wake_up(&pasid_state->wq); } static void put_pasid_state_wait(struct pasid_state *pasid_state) { - atomic_dec(&pasid_state->count); - wait_event(pasid_state->wq, !atomic_read(&pasid_state->count)); + refcount_dec(&pasid_state->count); + wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); free_pasid_state(pasid_state); } @@ -624,7 +625,7 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, goto out; - atomic_set(&pasid_state->count, 1); + refcount_set(&pasid_state->count, 1); init_waitqueue_head(&pasid_state->wq); spin_lock_init(&pasid_state->lock); |