diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 09:05:19 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 09:05:19 -0800 |
commit | e529d3507a93d3c9528580081bbaf931a50de154 (patch) | |
tree | c363cf495fdbec199a8a9860d8e663389ae8cdd4 | |
parent | 6a24711d5c0bc8fb0fc49def433ab89ecbedf095 (diff) | |
parent | ffcb754584603adf7039d7972564fbf6febdc542 (diff) | |
download | linux-e529d3507a93d3c9528580081bbaf931a50de154.tar.gz linux-e529d3507a93d3c9528580081bbaf931a50de154.tar.bz2 linux-e529d3507a93d3c9528580081bbaf931a50de154.zip |
Merge tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig:
- reduce the swiotlb buffer size on allocation failure (Alexey
Kardashevskiy)
- clean up passing of bogus GFP flags to the dma-coherent allocator
(Christoph Hellwig)
* tag 'dma-mapping-6.2-2022-12-13' of git://git.infradead.org/users/hch/dma-mapping:
dma-mapping: reject __GFP_COMP in dma_alloc_attrs
ALSA: memalloc: don't pass bogus GFP_ flags to dma_alloc_*
s390/ism: don't pass bogus GFP_ flags to dma_alloc_coherent
cnic: don't pass bogus GFP_ flags to dma_alloc_coherent
RDMA/qib: don't pass bogus GFP_ flags to dma_alloc_coherent
RDMA/hfi1: don't pass bogus GFP_ flags to dma_alloc_coherent
media: videobuf-dma-contig: use dma_mmap_coherent
swiotlb: reduce the swiotlb buffer size on allocation failure
-rw-r--r-- | arch/arm/mm/dma-mapping.c | 17 | ||||
-rw-r--r-- | drivers/infiniband/hw/hfi1/init.c | 21 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_iba6120.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/qib/qib_init.c | 21 | ||||
-rw-r--r-- | drivers/iommu/dma-iommu.c | 3 | ||||
-rw-r--r-- | drivers/media/v4l2-core/videobuf-dma-contig.c | 22 | ||||
-rw-r--r-- | drivers/net/ethernet/broadcom/cnic.c | 6 | ||||
-rw-r--r-- | drivers/s390/net/ism_drv.c | 3 | ||||
-rw-r--r-- | kernel/dma/mapping.c | 8 | ||||
-rw-r--r-- | kernel/dma/swiotlb.c | 63 | ||||
-rw-r--r-- | sound/core/memalloc.c | 5 |
11 files changed, 69 insertions, 102 deletions
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index d7909091cf97..c135f6e37a00 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -564,14 +564,6 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (mask < 0xffffffffULL) gfp |= GFP_DMA; - /* - * Following is a work-around (a.k.a. hack) to prevent pages - * with __GFP_COMP being passed to split_page() which cannot - * handle them. The real problem is that this flag probably - * should be 0 on ARM as it is not supported on this - * platform; see CONFIG_HUGETLBFS. - */ - gfp &= ~(__GFP_COMP); args.gfp = gfp; *handle = DMA_MAPPING_ERROR; @@ -1093,15 +1085,6 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, return __iommu_alloc_simple(dev, size, gfp, handle, coherent_flag, attrs); - /* - * Following is a work-around (a.k.a. hack) to prevent pages - * with __GFP_COMP being passed to split_page() which cannot - * handle them. The real problem is that this flag probably - * should be 0 on ARM as it is not supported on this - * platform; see CONFIG_HUGETLBFS. - */ - gfp &= ~(__GFP_COMP); - pages = __iommu_alloc_buffer(dev, size, gfp, attrs, coherent_flag); if (!pages) return NULL; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index 436372b31431..24c0f0d257fc 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1761,17 +1761,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) unsigned amt; if (!rcd->rcvhdrq) { - gfp_t gfp_flags; - amt = rcvhdrq_size(rcd); - if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic) - gfp_flags = GFP_KERNEL; - else - gfp_flags = GFP_USER; rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, - gfp_flags | __GFP_COMP); + GFP_KERNEL); if (!rcd->rcvhdrq) { dd_dev_err(dd, @@ -1785,7 +1779,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, PAGE_SIZE, &rcd->rcvhdrqtailaddr_dma, - gfp_flags); + GFP_KERNEL); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; } @@ -1821,20 +1815,11 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) { struct hfi1_devdata *dd = rcd->dd; u32 max_entries, egrtop, alloced_bytes = 0; - gfp_t gfp_flags; u16 order, idx = 0; int ret = 0; u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu); /* - * GFP_USER, but without GFP_FS, so buffer cache can be - * coalesced (we hope); otherwise, even at order 4, - * heavy filesystem activity makes these fail, and we can - * use compound pages. - */ - gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; - - /* * The minimum size of the eager buffers is a groups of MTU-sized * buffers. * The global eager_buffer_size parameter is checked against the @@ -1864,7 +1849,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) dma_alloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, &rcd->egrbufs.buffers[idx].dma, - gfp_flags); + GFP_KERNEL); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index aea571943768..07386117f21a 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2075,7 +2075,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd) dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, dd->rcd[0]->rcvhdrq_size, &dd->cspec->dummy_hdrq_phys, - GFP_ATOMIC | __GFP_COMP); + GFP_ATOMIC); if (!dd->cspec->dummy_hdrq) { qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); /* fallback to just 0'ing */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 45211008449f..33667becd52b 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1546,18 +1546,14 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) if (!rcd->rcvhdrq) { dma_addr_t phys_hdrqtail; - gfp_t gfp_flags; amt = ALIGN(dd->rcvhdrcnt * dd->rcvhdrentsize * sizeof(u32), PAGE_SIZE); - gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? - GFP_USER : GFP_KERNEL; old_node_id = dev_to_node(&dd->pcidev->dev); set_dev_node(&dd->pcidev->dev, rcd->node_id); - rcd->rcvhdrq = dma_alloc_coherent( - &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, - gfp_flags | __GFP_COMP); + rcd->rcvhdrq = dma_alloc_coherent(&dd->pcidev->dev, amt, + &rcd->rcvhdrq_phys, GFP_KERNEL); set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrq) { @@ -1577,7 +1573,7 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) set_dev_node(&dd->pcidev->dev, rcd->node_id); rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, - gfp_flags); + GFP_KERNEL); set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; @@ -1621,17 +1617,8 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) struct qib_devdata *dd = rcd->dd; unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; size_t size; - gfp_t gfp_flags; int old_node_id; - /* - * GFP_USER, but without GFP_FS, so buffer cache can be - * coalesced (we hope); otherwise, even at order 4, - * heavy filesystem activity makes these fail, and we can - * use compound pages. - */ - gfp_flags = __GFP_RECLAIM | __GFP_IO | __GFP_COMP; - egrcnt = rcd->rcvegrcnt; egroff = rcd->rcvegr_tid_base; egrsize = dd->rcvegrbufsize; @@ -1663,7 +1650,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) rcd->rcvegrbuf[e] = dma_alloc_coherent(&dd->pcidev->dev, size, &rcd->rcvegrbuf_phys[e], - gfp_flags); + GFP_KERNEL); set_dev_node(&dd->pcidev->dev, old_node_id); if (!rcd->rcvegrbuf[e]) goto bail_rcvegrbuf_phys; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9297b741f5e8..f798c44e0903 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -744,9 +744,6 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, /* IOMMU can map any pages, so himem can also be used here */ gfp |= __GFP_NOWARN | __GFP_HIGHMEM; - /* It makes no sense to muck about with huge pages */ - gfp &= ~__GFP_COMP; - while (count) { struct page *page = NULL; unsigned int order_size; diff --git a/drivers/media/v4l2-core/videobuf-dma-contig.c b/drivers/media/v4l2-core/videobuf-dma-contig.c index 52312ce2ba05..f2c439359557 100644 --- a/drivers/media/v4l2-core/videobuf-dma-contig.c +++ b/drivers/media/v4l2-core/videobuf-dma-contig.c @@ -36,12 +36,11 @@ struct videobuf_dma_contig_memory { static int __videobuf_dc_alloc(struct device *dev, struct videobuf_dma_contig_memory *mem, - unsigned long size, gfp_t flags) + unsigned long size) { mem->size = size; - mem->vaddr = dma_alloc_coherent(dev, mem->size, - &mem->dma_handle, flags); - + mem->vaddr = dma_alloc_coherent(dev, mem->size, &mem->dma_handle, + GFP_KERNEL); if (!mem->vaddr) { dev_err(dev, "memory alloc size %ld failed\n", mem->size); return -ENOMEM; @@ -258,8 +257,7 @@ static int __videobuf_iolock(struct videobuf_queue *q, return videobuf_dma_contig_user_get(mem, vb); /* allocate memory for the read() method */ - if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size), - GFP_KERNEL)) + if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(vb->size))) return -ENOMEM; break; case V4L2_MEMORY_OVERLAY: @@ -295,22 +293,18 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q, BUG_ON(!mem); MAGIC_CHECK(mem->magic, MAGIC_DC_MEM); - if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize), - GFP_KERNEL | __GFP_COMP)) + if (__videobuf_dc_alloc(q->dev, mem, PAGE_ALIGN(buf->bsize))) goto error; - /* Try to remap memory */ - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - /* the "vm_pgoff" is just used in v4l2 to find the * corresponding buffer data structure which is allocated * earlier and it does not mean the offset from the physical * buffer start address as usual. So set it to 0 to pass - * the sanity check in vm_iomap_memory(). + * the sanity check in dma_mmap_coherent(). */ vma->vm_pgoff = 0; - - retval = vm_iomap_memory(vma, mem->dma_handle, mem->size); + retval = dma_mmap_coherent(q->dev, vma, mem->vaddr, mem->dma_handle, + mem->size); if (retval) { dev_err(q->dev, "mmap: remap failed with error %d. ", retval); diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index 74bc053a2078..7926aaef8f0c 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -1027,16 +1027,14 @@ static int __cnic_alloc_uio_rings(struct cnic_uio_dev *udev, int pages) udev->l2_ring_size = pages * CNIC_PAGE_SIZE; udev->l2_ring = dma_alloc_coherent(&udev->pdev->dev, udev->l2_ring_size, - &udev->l2_ring_map, - GFP_KERNEL | __GFP_COMP); + &udev->l2_ring_map, GFP_KERNEL); if (!udev->l2_ring) return -ENOMEM; udev->l2_buf_size = (cp->l2_rx_ring_size + 1) * cp->l2_single_buf_size; udev->l2_buf_size = CNIC_PAGE_ALIGN(udev->l2_buf_size); udev->l2_buf = dma_alloc_coherent(&udev->pdev->dev, udev->l2_buf_size, - &udev->l2_buf_map, - GFP_KERNEL | __GFP_COMP); + &udev->l2_buf_map, GFP_KERNEL); if (!udev->l2_buf) { __cnic_free_uio_rings(udev); return -ENOMEM; diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c index d34bb6ec1490..dfd401d9e362 100644 --- a/drivers/s390/net/ism_drv.c +++ b/drivers/s390/net/ism_drv.c @@ -243,7 +243,8 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct smcd_dmb *dmb) dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len, &dmb->dma_addr, - GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC | __GFP_COMP | __GFP_NORETRY); + GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | __GFP_NORETRY); if (!dmb->cpu_addr) clear_bit(dmb->sba_idx, ism->sba_bitmap); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 33437d620644..c026a5a5e046 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -498,6 +498,14 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, WARN_ON_ONCE(!dev->coherent_dma_mask); + /* + * DMA allocations can never be turned back into a page pointer, so + * requesting compound pages doesn't make sense (and can't even be + * supported at all by various backends). + */ + if (WARN_ON_ONCE(flag & __GFP_COMP)) + return NULL; + if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr)) return cpu_addr; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 339a990554e7..a34c38bbe28f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -300,6 +300,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, return; } +static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags, + int (*remap)(void *tlb, unsigned long nslabs)) +{ + size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); + void *tlb; + + /* + * By default allocate the bounce buffer memory from low memory, but + * allow to pick a location everywhere for hypervisors with guest + * memory encryption. + */ + if (flags & SWIOTLB_ANY) + tlb = memblock_alloc(bytes, PAGE_SIZE); + else + tlb = memblock_alloc_low(bytes, PAGE_SIZE); + + if (!tlb) { + pr_warn("%s: Failed to allocate %zu bytes tlb structure\n", + __func__, bytes); + return NULL; + } + + if (remap && remap(tlb, nslabs) < 0) { + memblock_free(tlb, PAGE_ALIGN(bytes)); + pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); + return NULL; + } + + return tlb; +} + /* * Statically reserve bounce buffer space and initialize bounce buffer data * structures for the software IO TLB used to implement the DMA API. @@ -310,7 +341,6 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, struct io_tlb_mem *mem = &io_tlb_default_mem; unsigned long nslabs; size_t alloc_size; - size_t bytes; void *tlb; if (!addressing_limit && !swiotlb_force_bounce) @@ -326,31 +356,16 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags, swiotlb_adjust_nareas(num_possible_cpus()); nslabs = default_nslabs; - /* - * By default allocate the bounce buffer memory from low memory, but - * allow to pick a location everywhere for hypervisors with guest - * memory encryption. - */ -retry: - bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT); - if (flags & SWIOTLB_ANY) - tlb = memblock_alloc(bytes, PAGE_SIZE); - else - tlb = memblock_alloc_low(bytes, PAGE_SIZE); - if (!tlb) { - pr_warn("%s: failed to allocate tlb structure\n", __func__); - return; - } - - if (remap && remap(tlb, nslabs) < 0) { - memblock_free(tlb, PAGE_ALIGN(bytes)); - + while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) { + if (nslabs <= IO_TLB_MIN_SLABS) + return; nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE); - if (nslabs >= IO_TLB_MIN_SLABS) - goto retry; + } - pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes); - return; + if (default_nslabs != nslabs) { + pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs", + default_nslabs, nslabs); + default_nslabs = nslabs; } alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs)); diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index ba095558b6d1..34250e6022ff 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -21,7 +21,6 @@ #define DEFAULT_GFP \ (GFP_KERNEL | \ - __GFP_COMP | /* compound page lets parts be mapped */ \ __GFP_RETRY_MAYFAIL | /* don't trigger OOM-killer */ \ __GFP_NOWARN) /* no stack trace print - this call is non-critical */ @@ -543,7 +542,7 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) void *p; sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, - DEFAULT_GFP, 0); + DEFAULT_GFP | __GFP_COMP, 0); #ifdef CONFIG_SND_DMA_SGBUF if (!sgt && !get_dma_ops(dmab->dev.dev)) { if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) @@ -811,7 +810,7 @@ static void *snd_dma_noncoherent_alloc(struct snd_dma_buffer *dmab, size_t size) void *p; p = dma_alloc_noncoherent(dmab->dev.dev, size, &dmab->addr, - dmab->dev.dir, DEFAULT_GFP); + dmab->dev.dir, DEFAULT_GFP | __GFP_COMP); if (p) dmab->dev.need_sync = dma_need_sync(dmab->dev.dev, dmab->addr); return p; |